From 0dc9ba5c3a168b9ae1573a97bd20fe1388f3c83d Mon Sep 17 00:00:00 2001 From: "kaf24@labyrinth.cl.cam.ac.uk" Date: Sun, 23 Feb 2003 11:22:39 +0000 Subject: [PATCH] bitkeeper revision 1.89 (3e58aeffGldVBDUrCPQ3AX0iBm4OYw) Many files: Ripped out lots from code from Xen's network transmit path, pending a new scheduler. devinit.c: Rename: xen-2.4.16/net/sch_generic.c -> xen-2.4.16/net/devinit.c sch_generic.c: Rename: BitKeeper/deleted/.del-sch_generic.c~c8d7d9959cc80952 -> xen-2.4.16/net/sch_generic.c .del-sch_generic.c~c8d7d9959cc80952: Delete: xen-2.4.16/net/sch_generic.c .del-utils.c~e7553afc72d1b648: Delete: xen-2.4.16/net/utils.c .del-pkt_sched.h~e13e384d6b974c61: Delete: xen-2.4.16/include/xeno/pkt_sched.h --- .rootkeys | 4 +- xen-2.4.16/common/network.c | 27 +- xen-2.4.16/drivers/net/net_init.c | 10 +- xen-2.4.16/drivers/scsi/aacraid/aacraid.h | 11 +- xen-2.4.16/drivers/scsi/aacraid/commsup.c | 18 +- xen-2.4.16/drivers/scsi/aacraid/rx.c | 10 +- xen-2.4.16/include/xeno/if_vlan.h | 10 + xen-2.4.16/include/xeno/interrupt.h | 18 +- xen-2.4.16/include/xeno/netdevice.h | 84 +- xen-2.4.16/include/xeno/pkt_sched.h | 816 ------- xen-2.4.16/include/xeno/skbuff.h | 51 +- xen-2.4.16/include/xeno/sockios.h | 3 - xen-2.4.16/include/xeno/vif.h | 9 +- xen-2.4.16/net/dev.c | 2406 ++++++++++----------- xen-2.4.16/net/devinit.c | 114 + xen-2.4.16/net/sch_generic.c | 525 ----- xen-2.4.16/net/skbuff.c | 47 +- xen-2.4.16/net/utils.c | 75 - 18 files changed, 1361 insertions(+), 2877 deletions(-) delete mode 100644 xen-2.4.16/include/xeno/pkt_sched.h create mode 100644 xen-2.4.16/net/devinit.c delete mode 100644 xen-2.4.16/net/sch_generic.c delete mode 100644 xen-2.4.16/net/utils.c diff --git a/.rootkeys b/.rootkeys index 2ec1d66898..46ccb7b3da 100644 --- a/.rootkeys +++ b/.rootkeys @@ -260,7 +260,6 @@ 3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen-2.4.16/include/xeno/pci_ids.h 3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen-2.4.16/include/xeno/perfc.h 3e54c38de9SUSYSAwxDf_DwkpAnQFA xen-2.4.16/include/xeno/perfc_defn.h -3ddb79c2byJwwNNkiES__A9H4Cvc4g xen-2.4.16/include/xeno/pkt_sched.h 3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen-2.4.16/include/xeno/prefetch.h 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen-2.4.16/include/xeno/reboot.h 3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen-2.4.16/include/xeno/sched.h @@ -279,10 +278,9 @@ 3ddb79c4YQCQ6r0xNLLu0jfbM7pVmA xen-2.4.16/net/Makefile 3ddb79c4AkfDkTCw0comx4L8wsUOMg xen-2.4.16/net/dev.c 3ddb79c4x1L_soh8b-r_1jQW_37Icw xen-2.4.16/net/dev_mcast.c +3ddb79c4KZhNxUuYJ7lul8cc-wRkyg xen-2.4.16/net/devinit.c 3ddb79c4NSDwiQ-AmrYdxcRAwLPzwQ xen-2.4.16/net/eth.c -3ddb79c4KZhNxUuYJ7lul8cc-wRkyg xen-2.4.16/net/sch_generic.c 3ddb79c4TZj1wXPKQt36O72SddtBNQ xen-2.4.16/net/skbuff.c -3ddb79c4ARyIHqv3Y6YFckIUbyA8Tw xen-2.4.16/net/utils.c 3ddb79c4x8dvwPtzclghWAKFWpEBFA xen-2.4.16/tools/Makefile 3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen-2.4.16/tools/elf-reloc.c 3ddb79bbYMXGmQTsr5BeGS_RuZ5f_w xenolinux-2.4.16-sparse/Makefile diff --git a/xen-2.4.16/common/network.c b/xen-2.4.16/common/network.c index fea01b326e..84b201f94c 100644 --- a/xen-2.4.16/common/network.c +++ b/xen-2.4.16/common/network.c @@ -54,29 +54,27 @@ net_vif_t *create_net_vif(int domain) net_shadow_ring_t *shadow_ring; struct task_struct *dom_task; - if ( !(dom_task = find_domain_by_id(domain)) ) - { - return NULL; - } + if ( !(dom_task = find_domain_by_id(domain)) ) + return NULL; if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL ) - { - return NULL; - } + return NULL; new_ring = dom_task->net_ring_base + dom_task->num_net_vifs; memset(new_ring, 0, sizeof(net_ring_t)); shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL); - if (shadow_ring == NULL) goto fail; + if ( shadow_ring == NULL ) goto fail; shadow_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_shadow_entry_t), GFP_KERNEL); - if ( shadow_ring->rx_ring == NULL ) + shadow_ring->tx_ring = kmalloc(TX_RING_SIZE + * sizeof(tx_shadow_entry_t), GFP_KERNEL); + if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) ) goto fail; shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0; - shadow_ring->tx_cons = 0; + shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0; /* Fill in the new vif struct. */ @@ -98,7 +96,13 @@ net_vif_t *create_net_vif(int domain) return new_vif; fail: - printk("VIF allocation failed!\n"); + kmem_cache_free(net_vif_cache, new_vif); + if ( shadow_ring != NULL ) + { + if ( shadow_ring->rx_ring ) kfree(shadow_ring->rx_ring); + if ( shadow_ring->tx_ring ) kfree(shadow_ring->tx_ring); + kfree(shadow_ring); + } return NULL; } @@ -125,6 +129,7 @@ void destroy_net_vif(struct task_struct *p) sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed write_unlock(&sys_vif_lock); + kfree(p->net_vif_list[i]->shadow_ring->tx_ring); kfree(p->net_vif_list[i]->shadow_ring->rx_ring); kfree(p->net_vif_list[i]->shadow_ring); kmem_cache_free(net_vif_cache, p->net_vif_list[i]); diff --git a/xen-2.4.16/drivers/net/net_init.c b/xen-2.4.16/drivers/net/net_init.c index a0c1f01002..3081ec22b3 100644 --- a/xen-2.4.16/drivers/net/net_init.c +++ b/xen-2.4.16/drivers/net/net_init.c @@ -423,8 +423,7 @@ void ether_setup(struct net_device *dev) dev->hard_header_len = ETH_HLEN; dev->mtu = 1500; /* eth_mtu */ dev->addr_len = ETH_ALEN; - dev->tx_queue_len = 100; /* Ethernet wants good queues */ - + memset(dev->broadcast,0xFF, ETH_ALEN); /* New-style flags. */ @@ -449,7 +448,6 @@ void fddi_setup(struct net_device *dev) dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ dev->addr_len = FDDI_K_ALEN; - dev->tx_queue_len = 100; /* Long queues on FDDI */ memset(dev->broadcast, 0xFF, FDDI_K_ALEN); @@ -482,9 +480,8 @@ void hippi_setup(struct net_device *dev) dev->hard_header_len = HIPPI_HLEN; dev->mtu = 65280; dev->addr_len = HIPPI_ALEN; - dev->tx_queue_len = 25 /* 5 */; - memset(dev->broadcast, 0xFF, HIPPI_ALEN); + memset(dev->broadcast, 0xFF, HIPPI_ALEN); /* * HIPPI doesn't support broadcast+multicast and we only use @@ -523,7 +520,6 @@ void ltalk_setup(struct net_device *dev) dev->hard_header_len = LTALK_HLEN; dev->mtu = LTALK_MTU; dev->addr_len = LTALK_ALEN; - dev->tx_queue_len = 10; dev->broadcast[0] = 0xFF; @@ -594,7 +590,6 @@ void tr_setup(struct net_device *dev) dev->hard_header_len = TR_HLEN; dev->mtu = 2000; dev->addr_len = TR_ALEN; - dev->tx_queue_len = 100; /* Long queues on tr */ memset(dev->broadcast,0xFF, TR_ALEN); @@ -671,7 +666,6 @@ void fc_setup(struct net_device *dev) dev->hard_header_len = FC_HLEN; dev->mtu = 2024; dev->addr_len = FC_ALEN; - dev->tx_queue_len = 100; /* Long queues on fc */ memset(dev->broadcast,0xFF, FC_ALEN); diff --git a/xen-2.4.16/drivers/scsi/aacraid/aacraid.h b/xen-2.4.16/drivers/scsi/aacraid/aacraid.h index cbfee8aa01..1f9838436d 100644 --- a/xen-2.4.16/drivers/scsi/aacraid/aacraid.h +++ b/xen-2.4.16/drivers/scsi/aacraid/aacraid.h @@ -6,10 +6,10 @@ #include -#define TRY_SOFTIRQ -#ifdef TRY_SOFTIRQ +#define TRY_TASKLET +#ifdef TRY_TASKLET /* XXX SMH: trying to use softirqs to trigger stuff done prev by threads */ -#include /* for softirq stuff */ +#include /* for tasklet/softirq stuff */ #endif /*------------------------------------------------------------------------------ @@ -1408,8 +1408,9 @@ int aac_rx_init(struct aac_dev *dev, unsigned long devNumber); int aac_sa_init(struct aac_dev *dev, unsigned long devNumber); unsigned int aac_response_normal(struct aac_queue * q); unsigned int aac_command_normal(struct aac_queue * q); -#ifdef TRY_SOFTIRQ -int aac_command_thread(struct softirq_action *h); +#ifdef TRY_TASKLET +extern struct tasklet_struct aac_command_tasklet; +int aac_command_thread(unsigned long data); #else int aac_command_thread(struct aac_dev * dev); #endif diff --git a/xen-2.4.16/drivers/scsi/aacraid/commsup.c b/xen-2.4.16/drivers/scsi/aacraid/commsup.c index a310fe4499..7d84ad241c 100644 --- a/xen-2.4.16/drivers/scsi/aacraid/commsup.c +++ b/xen-2.4.16/drivers/scsi/aacraid/commsup.c @@ -39,7 +39,7 @@ #include #include -#include // for softirq stuff +#include /* tasklet stuff */ /* #include */ /* #include */ @@ -529,10 +529,15 @@ int fib_send(u16 command, struct fib * fibptr, unsigned long size, int priority #if 0 down(&fibptr->event_wait); #endif -#ifdef TRY_SOFTIRQ +#ifdef TRY_TASKLET + /* + * XXX KAF: Well, this is pretty gross. We should probably + * do_softirq() after scheduling the tasklet, as long as we + * are _sure_ we hold no locks here... + */ printk("about to softirq aac_command_thread...\n"); while (!fibptr->done) { - raise_softirq(SCSI_LOW_SOFTIRQ); + tasklet_schedule(&aac_command_tasklet); mdelay(100); } printk("back from softirq cmd thread and fibptr->done!\n"); @@ -837,13 +842,14 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) * more FIBs. */ -#ifndef TRY_SOFTIRQ +#ifndef TRY_TASKLET +DECLARE_TASKLET_DISABLED(aac_command_tasklet, aac_command_thread, 0); int aac_command_thread(struct aac_dev * dev) { #else -int aac_command_thread(struct softirq_action *h) +int aac_command_thread(unsigned long data) { - struct aac_dev *dev = (struct aac_dev *)h->data; + struct aac_dev *dev = (struct aac_dev *)data; #endif struct hw_fib *fib, *newfib; struct fib fibptr; /* for error logging */ diff --git a/xen-2.4.16/drivers/scsi/aacraid/rx.c b/xen-2.4.16/drivers/scsi/aacraid/rx.c index 4ee6db4900..e79ad49e74 100644 --- a/xen-2.4.16/drivers/scsi/aacraid/rx.c +++ b/xen-2.4.16/drivers/scsi/aacraid/rx.c @@ -437,16 +437,16 @@ int aac_rx_init(struct aac_dev *dev, unsigned long num) if (aac_init_adapter(dev) == NULL) return -1; -#if 0 +#ifdef TRY_TASKLET + aac_command_tasklet.data = (unsigned long)dev; + tasklet_enable(&aac_command_tasklet); +#else /* * Start any kernel threads needed */ dev->thread_pid = kernel_thread((int (*)(void *))aac_command_thread, dev, 0); -#else - /* XXX SMH: just put in a softirq handler instead... */ - open_softirq(SCSI_LOW_SOFTIRQ, aac_command_thread, dev); -#endif +#endif /* * Tell the adapter that all is configured, and it can start diff --git a/xen-2.4.16/include/xeno/if_vlan.h b/xen-2.4.16/include/xeno/if_vlan.h index c8c0903e52..d3e96bc4cf 100644 --- a/xen-2.4.16/include/xeno/if_vlan.h +++ b/xen-2.4.16/include/xeno/if_vlan.h @@ -135,12 +135,19 @@ struct vlan_skb_tx_cookie { u32 vlan_tag; }; +#if 0 #define VLAN_TX_COOKIE_MAGIC 0x564c414e /* "VLAN" in ascii. */ #define VLAN_TX_SKB_CB(__skb) ((struct vlan_skb_tx_cookie *)&((__skb)->cb[0])) #define vlan_tx_tag_present(__skb) \ (VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC) #define vlan_tx_tag_get(__skb) (VLAN_TX_SKB_CB(__skb)->vlan_tag) +#else /* XXX KAF: We don't support vlan tagging at the moment. */ +#define VLAN_TX_SKB_CB(__skb) NULL +#define vlan_tx_tag_present(__skb) 0 +#define vlan_tx_tag_get(__skb) 0 +#endif +#if 0 /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, @@ -203,6 +210,9 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, { return __vlan_hwaccel_rx(skb, grp, vlan_tag, 1); } +#else +#define vlan_hwaccel_rx(_skb, _grp, _tag) (netif_rx(_skb)) +#endif #endif /* __KERNEL__ */ /* VLAN IOCTLs are found in sockios.h */ diff --git a/xen-2.4.16/include/xeno/interrupt.h b/xen-2.4.16/include/xeno/interrupt.h index 81778b6d93..b018eb3de7 100644 --- a/xen-2.4.16/include/xeno/interrupt.h +++ b/xen-2.4.16/include/xeno/interrupt.h @@ -27,19 +27,8 @@ struct irqaction { enum { TIMER_BH = 0, TQUEUE_BH, - DIGI_BH, - SERIAL_BH, - RISCOM8_BH, - SPECIALIX_BH, - AURORA_BH, - ESP_BH, SCSI_BH, - IMMEDIATE_BH, - CYCLADES_BH, - CM206_BH, - JS_BH, - MACSERIAL_BH, - ISICOM_BH + IMMEDIATE_BH }; #include @@ -56,10 +45,7 @@ enum { enum { HI_SOFTIRQ=0, - NET_TX_SOFTIRQ, - NET_RX_SOFTIRQ, - TASKLET_SOFTIRQ, - SCSI_LOW_SOFTIRQ, + TASKLET_SOFTIRQ }; /* softirq mask and active fields moved to irq_cpustat_t in diff --git a/xen-2.4.16/include/xeno/netdevice.h b/xen-2.4.16/include/xeno/netdevice.h index 48c7fae3e9..03169af14b 100644 --- a/xen-2.4.16/include/xeno/netdevice.h +++ b/xen-2.4.16/include/xeno/netdevice.h @@ -25,17 +25,18 @@ #ifndef _LINUX_NETDEVICE_H #define _LINUX_NETDEVICE_H -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include #ifdef __KERNEL__ -#include +#include struct divert_blk; struct vlan_group; @@ -65,28 +66,6 @@ struct vlan_group; #define MAX_ADDR_LEN 8 /* Largest hardware address length */ -/* - * Compute the worst case header length according to the protocols - * used. - */ - -#if !defined(CONFIG_AX25) && !defined(CONFIG_AX25_MODULE) && !defined(CONFIG_TR) -#define LL_MAX_HEADER 32 -#else -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) -#define LL_MAX_HEADER 96 -#else -#define LL_MAX_HEADER 48 -#endif -#endif - -#if !defined(CONFIG_NET_IPIP) && \ - !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) -#define MAX_HEADER LL_MAX_HEADER -#else -#define MAX_HEADER (LL_MAX_HEADER + 48) -#endif - /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -141,8 +120,8 @@ enum { extern const char *if_port_text[]; -#include -#include +#include +#include struct neighbour; struct neigh_parms; @@ -311,18 +290,6 @@ struct net_device void *dn_ptr; /* DECnet specific data */ void *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ -#if 0 - /* IAP: add fields but - nothing else */ - struct list_head poll_list; /* Link to poll list */ - int quota; - int weight; -#endif - struct Qdisc *qdisc; - struct Qdisc *qdisc_sleeping; - struct Qdisc *qdisc_list; - struct Qdisc *qdisc_ingress; - unsigned long tx_queue_len; /* Max frames per queue allowed */ /* hard_start_xmit synchronizer */ spinlock_t xmit_lock; @@ -425,12 +392,10 @@ struct packet_type }; -#include -//#include +#include -extern struct net_device loopback_dev; /* The loopback */ -extern struct net_device *dev_base; /* All devices */ -extern rwlock_t dev_base_lock; /* Device list lock */ +extern struct net_device *dev_base; /* All devices */ +extern rwlock_t dev_base_lock; /* Device list lock */ extern int netdev_boot_setup_add(char *name, struct ifmap *map); extern int netdev_boot_setup_check(struct net_device *dev); @@ -447,8 +412,10 @@ extern int dev_close(struct net_device *dev); extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern int unregister_netdevice(struct net_device *dev); -//extern int register_netdevice_notifier(struct notifier_block *nb); -//extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern void dev_shutdown(struct net_device *dev); +extern void dev_activate(struct net_device *dev); +extern void dev_deactivate(struct net_device *dev); +extern void dev_init_scheduler(struct net_device *dev); extern int dev_new_index(void); extern struct net_device *dev_get_by_index(int ifindex); extern struct net_device *__dev_get_by_index(int ifindex); @@ -461,17 +428,11 @@ static inline int unregister_gifconf(unsigned int family) return register_gifconf(family, 0); } -/* - * Incoming packets are placed on per-cpu queues so that - * no locking is needed. - */ +extern struct tasklet_struct net_tx_tasklet; + struct softnet_data { - int throttle; - int cng_level; - int avg_blog; - struct sk_buff_head input_pkt_queue; struct net_device *output_queue; struct sk_buff *completion_queue; } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -490,7 +451,7 @@ static inline void __netif_schedule(struct net_device *dev) local_irq_save(flags); dev->next_sched = softnet_data[cpu].output_queue; softnet_data[cpu].output_queue = dev; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + tasklet_schedule(&net_tx_tasklet); local_irq_restore(flags); } } @@ -533,13 +494,13 @@ static inline int netif_running(struct net_device *dev) static inline void dev_kfree_skb_irq(struct sk_buff *skb) { if (atomic_dec_and_test(&skb->users)) { - int cpu =smp_processor_id(); + int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); skb->next = softnet_data[cpu].completion_queue; softnet_data[cpu].completion_queue = skb; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + tasklet_schedule(&net_tx_tasklet); local_irq_restore(flags); } } @@ -577,11 +538,6 @@ static inline int netif_rx_ni(struct sk_buff *skb) return err; } -static inline void dev_init_buffers(struct net_device *dev) -{ - /* WILL BE REMOVED IN 2.5.0 */ -} - extern int netdev_finish_unregister(struct net_device *dev); static inline void dev_put(struct net_device *dev) diff --git a/xen-2.4.16/include/xeno/pkt_sched.h b/xen-2.4.16/include/xeno/pkt_sched.h deleted file mode 100644 index 87b1a08e52..0000000000 --- a/xen-2.4.16/include/xeno/pkt_sched.h +++ /dev/null @@ -1,816 +0,0 @@ -#ifndef __NET_PKT_SCHED_H -#define __NET_PKT_SCHED_H - -#define PSCHED_GETTIMEOFDAY 1 -#define PSCHED_JIFFIES 2 -#define PSCHED_CPU 3 - -#define PSCHED_CLOCK_SOURCE PSCHED_JIFFIES - -#include -#include -//#include - -#ifdef CONFIG_X86_TSC -#include -#endif - -struct rtattr; -struct Qdisc; - -struct qdisc_walker -{ - int stop; - int skip; - int count; - int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); -}; - -struct Qdisc_class_ops -{ - /* Child qdisc manipulation */ - int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); - struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); - - /* Class manipulation routines */ - unsigned long (*get)(struct Qdisc *, u32 classid); - void (*put)(struct Qdisc *, unsigned long); - int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *); - int (*delete)(struct Qdisc *, unsigned long); - void (*walk)(struct Qdisc *, struct qdisc_walker * arg); - -#if 0 - /* Filter manipulation */ - struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long); - unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); - void (*unbind_tcf)(struct Qdisc *, unsigned long); -#endif -}; - -struct Qdisc_ops -{ - struct Qdisc_ops *next; - struct Qdisc_class_ops *cl_ops; - char id[IFNAMSIZ]; - int priv_size; - - int (*enqueue)(struct sk_buff *, struct Qdisc *); - struct sk_buff * (*dequeue)(struct Qdisc *); - int (*requeue)(struct sk_buff *, struct Qdisc *); - int (*drop)(struct Qdisc *); - - int (*init)(struct Qdisc *, struct rtattr *arg); - void (*reset)(struct Qdisc *); - void (*destroy)(struct Qdisc *); - int (*change)(struct Qdisc *, struct rtattr *arg); - - int (*dump)(struct Qdisc *, struct sk_buff *); -}; - -extern rwlock_t qdisc_tree_lock; - -struct Qdisc -{ - int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); - struct sk_buff * (*dequeue)(struct Qdisc *dev); - unsigned flags; -#define TCQ_F_BUILTIN 1 -#define TCQ_F_THROTTLED 2 -#define TCQ_F_INGRES 4 - struct Qdisc_ops *ops; - struct Qdisc *next; - u32 handle; - atomic_t refcnt; - struct sk_buff_head q; - struct net_device *dev; - - //struct tc_stats stats; - int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); - - /* This field is deprecated, but it is still used by CBQ - * and it will live until better solution will be invented. - */ - struct Qdisc *__parent; - - char data[0]; -}; - -struct qdisc_rate_table -{ - //struct tc_ratespec rate; - u32 data[256]; - struct qdisc_rate_table *next; - int refcnt; -}; - -static inline void sch_tree_lock(struct Qdisc *q) -{ - write_lock(&qdisc_tree_lock); - spin_lock_bh(&q->dev->queue_lock); -} - -static inline void sch_tree_unlock(struct Qdisc *q) -{ - spin_unlock_bh(&q->dev->queue_lock); - write_unlock(&qdisc_tree_lock); -} - -#if 0 -static inline void tcf_tree_lock(struct tcf_proto *tp) -{ - write_lock(&qdisc_tree_lock); - spin_lock_bh(&tp->q->dev->queue_lock); -} - -static inline void tcf_tree_unlock(struct tcf_proto *tp) -{ - spin_unlock_bh(&tp->q->dev->queue_lock); - write_unlock(&qdisc_tree_lock); -} - -static inline unsigned long -cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl) -{ - unsigned long old_cl; - - tcf_tree_lock(tp); - old_cl = *clp; - *clp = cl; - tcf_tree_unlock(tp); - return old_cl; -} - -static inline unsigned long -__cls_set_class(unsigned long *clp, unsigned long cl) -{ - unsigned long old_cl; - - old_cl = *clp; - *clp = cl; - return old_cl; -} -#endif - - -/* - Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth - - Normal IP packet size ~ 512byte, hence: - - 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for - 10Mbit ethernet. - - 10msec resolution -> <50Kbit/sec. - - The result: [34]86 is not good choice for QoS router :-( - - The things are not so bad, because we may use artifical - clock evaluated by integration of network data flow - in the most critical places. - - Note: we do not use fastgettimeofday. - The reason is that, when it is not the same thing as - gettimeofday, it returns invalid timestamp, which is - not updated, when net_bh is active. - - So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums - with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86 - and pentiums without rtdsc. - You can use PSCHED_GETTIMEOFDAY on another architectures, - which have fast and precise clock source, but it is too expensive. - */ - -/* General note about internal clock. - - Any clock source returns time intervals, measured in units - close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely - microseconds, otherwise something close but different chosen to minimize - arithmetic cost. Ratio usec/internal untis in form nominator/denominator - may be read from /proc/net/psched. - */ - - -#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY - -typedef struct timeval psched_time_t; -typedef long psched_tdiff_t; - -#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) -#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) - -#define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff); - -#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */ - -#define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2 - -typedef u64 psched_time_t; -typedef long psched_tdiff_t; - -extern psched_time_t psched_time_base; - -#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES - -#if HZ == 100 -#define PSCHED_JSCALE 13 -#elif HZ == 1024 -#define PSCHED_JSCALE 10 -#else -#define PSCHED_JSCALE 0 -#endif - -#define PSCHED_EXPORTLIST_2 - -#if ~0UL == 0xFFFFFFFF - -#define PSCHED_WATCHER unsigned long - -extern PSCHED_WATCHER psched_time_mark; - -#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<>PSCHED_JSCALE) - -#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU - -extern psched_tdiff_t psched_clock_per_hz; -extern int psched_clock_scale; - -#define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \ - EXPORT_SYMBOL(psched_clock_scale); - -#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) - -#ifdef CONFIG_X86_TSC - -#define PSCHED_GET_TIME(stamp) \ -({ u64 __cur; \ - rdtscll(__cur); \ - (stamp) = __cur>>psched_clock_scale; \ -}) - -#define PSCHED_EXPORTLIST_1 - -#elif defined (__alpha__) - -#define PSCHED_WATCHER u32 - -extern PSCHED_WATCHER psched_time_mark; - -#define PSCHED_GET_TIME(stamp) \ -({ u32 __res; \ - __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \ - if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \ - psched_time_mark = __res; \ - (stamp) = (psched_time_base + __res)>>psched_clock_scale; \ -}) - -#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \ - EXPORT_SYMBOL(psched_time_mark); - -#else - -#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch. - -#endif /* ARCH */ - -#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */ - -#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */ - -#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY -#define PSCHED_TDIFF(tv1, tv2) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - if (__delta_sec) { \ - switch (__delta_sec) { \ - default: \ - __delta = 0; \ - case 2: \ - __delta += 1000000; \ - case 1: \ - __delta += 1000000; \ - } \ - } \ - __delta; \ -}) - -extern int psched_tod_diff(int delta_sec, int bound); - -#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - switch (__delta_sec) { \ - default: \ - __delta = psched_tod_diff(__delta_sec, bound); guard; break; \ - case 2: \ - __delta += 1000000; \ - case 1: \ - __delta += 1000000; \ - case 0: ; \ - } \ - __delta; \ -}) - -#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ - (tv1).tv_sec <= (tv2).tv_sec) || \ - (tv1).tv_sec < (tv2).tv_sec) - -#define PSCHED_TADD2(tv, delta, tv_res) \ -({ \ - int __delta = (tv).tv_usec + (delta); \ - (tv_res).tv_sec = (tv).tv_sec; \ - if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \ - (tv_res).tv_usec = __delta; \ -}) - -#define PSCHED_TADD(tv, delta) \ -({ \ - (tv).tv_usec += (delta); \ - if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \ - (tv).tv_usec -= 1000000; } \ -}) - -/* Set/check that time is in the "past perfect"; - it depends on concrete representation of system time - */ - -#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) -#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) - -#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) - -#else - -#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) -#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ -({ \ - long __delta = (tv1) - (tv2); \ - if ( __delta > (bound)) { __delta = (bound); guard; } \ - __delta; \ -}) - - -#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) -#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) -#define PSCHED_TADD(tv, delta) ((tv) += (delta)) -#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) -#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) -#define PSCHED_AUDIT_TDIFF(t) - -#endif - -struct tcf_police -{ - struct tcf_police *next; - int refcnt; - u32 index; - - int action; - int result; - u32 ewma_rate; - u32 burst; - u32 mtu; - - u32 toks; - u32 ptoks; - psched_time_t t_c; - spinlock_t lock; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; - - //struct tc_stats stats; -}; - -//extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st); -extern void tcf_police_destroy(struct tcf_police *p); -extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est); -extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); -extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); - -static inline void tcf_police_release(struct tcf_police *p) -{ - if (p && --p->refcnt == 0) - tcf_police_destroy(p); -} - -extern struct Qdisc noop_qdisc; -extern struct Qdisc_ops noop_qdisc_ops; -extern struct Qdisc_ops pfifo_qdisc_ops; -extern struct Qdisc_ops bfifo_qdisc_ops; - -int register_qdisc(struct Qdisc_ops *qops); -int unregister_qdisc(struct Qdisc_ops *qops); -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); -struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); -void dev_init_scheduler(struct net_device *dev); -void dev_shutdown(struct net_device *dev); -void dev_activate(struct net_device *dev); -void dev_deactivate(struct net_device *dev); -void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops); -//int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt); -//void qdisc_kill_estimator(struct tc_stats *stats); -//struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); -void qdisc_put_rtab(struct qdisc_rate_table *tab); -int teql_init(void); -int tc_filter_init(void); -int pktsched_init(void); - -extern int qdisc_restart(struct net_device *dev); - -static inline void qdisc_run(struct net_device *dev) -{ - while (!netif_queue_stopped(dev) && - qdisc_restart(dev)<0) - /* NOTHING */; -} - -/* Calculate maximal size of packet seen by hard_start_xmit - routine of this device. - */ -static inline unsigned psched_mtu(struct net_device *dev) -{ - unsigned mtu = dev->mtu; - return dev->hard_header ? mtu + dev->hard_header_len : mtu; -} - - -/****************************************************************************** - * XXXXXXXXX Rest of this file is real linux/pkt_sched.h XXXXX - */ - - -/* Logical priority bands not depending on specific packet scheduler. - Every scheduler will map them to real traffic classes, if it has - no more precise mechanism to classify packets. - - These numbers have no special meaning, though their coincidence - with obsolete IPv6 values is not occasional :-). New IPv6 drafts - preferred full anarchy inspired by diffserv group. - - Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy - class, actually, as rule it will be handled with more care than - filler or even bulk. - */ - -#define TC_PRIO_BESTEFFORT 0 -#define TC_PRIO_FILLER 1 -#define TC_PRIO_BULK 2 -#define TC_PRIO_INTERACTIVE_BULK 4 -#define TC_PRIO_INTERACTIVE 6 -#define TC_PRIO_CONTROL 7 - -#define TC_PRIO_MAX 15 - -/* Generic queue statistics, available for all the elements. - Particular schedulers may have also their private records. - */ - -struct tc_stats -{ - __u64 bytes; /* NUmber of enqueues bytes */ - __u32 packets; /* Number of enqueued packets */ - __u32 drops; /* Packets dropped because of lack of resources */ - __u32 overlimits; /* Number of throttle events when this - * flow goes out of allocated bandwidth */ - __u32 bps; /* Current flow byte rate */ - __u32 pps; /* Current flow packet rate */ - __u32 qlen; - __u32 backlog; -#ifdef __KERNEL__ - spinlock_t *lock; -#endif -}; - -struct tc_estimator -{ - char interval; - unsigned char ewma_log; -}; - -/* "Handles" - --------- - - All the traffic control objects have 32bit identifiers, or "handles". - - They can be considered as opaque numbers from user API viewpoint, - but actually they always consist of two fields: major and - minor numbers, which are interpreted by kernel specially, - that may be used by applications, though not recommended. - - F.e. qdisc handles always have minor number equal to zero, - classes (or flows) have major equal to parent qdisc major, and - minor uniquely identifying class inside qdisc. - - Macros to manipulate handles: - */ - -#define TC_H_MAJ_MASK (0xFFFF0000U) -#define TC_H_MIN_MASK (0x0000FFFFU) -#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) -#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) -#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) - -#define TC_H_UNSPEC (0U) -#define TC_H_ROOT (0xFFFFFFFFU) -#define TC_H_INGRESS (0xFFFFFFF1U) - -struct tc_ratespec -{ - unsigned char cell_log; - unsigned char __reserved; - unsigned short feature; - short addend; - unsigned short mpu; - __u32 rate; -}; - -/* FIFO section */ - -struct tc_fifo_qopt -{ - __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ -}; - -/* PRIO section */ - -#define TCQ_PRIO_BANDS 16 - -struct tc_prio_qopt -{ - int bands; /* Number of bands */ - __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ -}; - -/* CSZ section */ - -struct tc_csz_qopt -{ - int flows; /* Maximal number of guaranteed flows */ - unsigned char R_log; /* Fixed point position for round number */ - unsigned char delta_log; /* Log of maximal managed time interval */ - __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> CSZ band */ -}; - -struct tc_csz_copt -{ - struct tc_ratespec slice; - struct tc_ratespec rate; - struct tc_ratespec peakrate; - __u32 limit; - __u32 buffer; - __u32 mtu; -}; - -enum -{ - TCA_CSZ_UNSPEC, - TCA_CSZ_PARMS, - TCA_CSZ_RTAB, - TCA_CSZ_PTAB, -}; - -/* TBF section */ - -struct tc_tbf_qopt -{ - struct tc_ratespec rate; - struct tc_ratespec peakrate; - __u32 limit; - __u32 buffer; - __u32 mtu; -}; - -enum -{ - TCA_TBF_UNSPEC, - TCA_TBF_PARMS, - TCA_TBF_RTAB, - TCA_TBF_PTAB, -}; - - -/* TEQL section */ - -/* TEQL does not require any parameters */ - -/* SFQ section */ - -struct tc_sfq_qopt -{ - unsigned quantum; /* Bytes per round allocated to flow */ - int perturb_period; /* Period of hash perturbation */ - __u32 limit; /* Maximal packets in queue */ - unsigned divisor; /* Hash divisor */ - unsigned flows; /* Maximal number of flows */ -}; - -/* - * NOTE: limit, divisor and flows are hardwired to code at the moment. - * - * limit=flows=128, divisor=1024; - * - * The only reason for this is efficiency, it is possible - * to change these parameters in compile time. - */ - -/* RED section */ - -enum -{ - TCA_RED_UNSPEC, - TCA_RED_PARMS, - TCA_RED_STAB, -}; - -struct tc_red_qopt -{ - __u32 limit; /* HARD maximal queue length (bytes) */ - __u32 qth_min; /* Min average length threshold (bytes) */ - __u32 qth_max; /* Max average length threshold (bytes) */ - unsigned char Wlog; /* log(W) */ - unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ - unsigned char Scell_log; /* cell size for idle damping */ - unsigned char flags; -#define TC_RED_ECN 1 -}; - -struct tc_red_xstats -{ - __u32 early; /* Early drops */ - __u32 pdrop; /* Drops due to queue limits */ - __u32 other; /* Drops due to drop() calls */ - __u32 marked; /* Marked packets */ -}; - -/* GRED section */ - -#define MAX_DPs 16 - -enum -{ - TCA_GRED_UNSPEC, - TCA_GRED_PARMS, - TCA_GRED_STAB, - TCA_GRED_DPS, -}; - -#define TCA_SET_OFF TCA_GRED_PARMS -struct tc_gred_qopt -{ - __u32 limit; /* HARD maximal queue length (bytes) -*/ - __u32 qth_min; /* Min average length threshold (bytes) -*/ - __u32 qth_max; /* Max average length threshold (bytes) -*/ - __u32 DP; /* upto 2^32 DPs */ - __u32 backlog; - __u32 qave; - __u32 forced; - __u32 early; - __u32 other; - __u32 pdrop; - - unsigned char Wlog; /* log(W) */ - unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ - unsigned char Scell_log; /* cell size for idle damping */ - __u8 prio; /* prio of this VQ */ - __u32 packets; - __u32 bytesin; -}; -/* gred setup */ -struct tc_gred_sopt -{ - __u32 DPs; - __u32 def_DP; - __u8 grio; -}; - -/* CBQ section */ - -#define TC_CBQ_MAXPRIO 8 -#define TC_CBQ_MAXLEVEL 8 -#define TC_CBQ_DEF_EWMA 5 - -struct tc_cbq_lssopt -{ - unsigned char change; - unsigned char flags; -#define TCF_CBQ_LSS_BOUNDED 1 -#define TCF_CBQ_LSS_ISOLATED 2 - unsigned char ewma_log; - unsigned char level; -#define TCF_CBQ_LSS_FLAGS 1 -#define TCF_CBQ_LSS_EWMA 2 -#define TCF_CBQ_LSS_MAXIDLE 4 -#define TCF_CBQ_LSS_MINIDLE 8 -#define TCF_CBQ_LSS_OFFTIME 0x10 -#define TCF_CBQ_LSS_AVPKT 0x20 - __u32 maxidle; - __u32 minidle; - __u32 offtime; - __u32 avpkt; -}; - -struct tc_cbq_wrropt -{ - unsigned char flags; - unsigned char priority; - unsigned char cpriority; - unsigned char __reserved; - __u32 allot; - __u32 weight; -}; - -struct tc_cbq_ovl -{ - unsigned char strategy; -#define TC_CBQ_OVL_CLASSIC 0 -#define TC_CBQ_OVL_DELAY 1 -#define TC_CBQ_OVL_LOWPRIO 2 -#define TC_CBQ_OVL_DROP 3 -#define TC_CBQ_OVL_RCLASSIC 4 - unsigned char priority2; - __u32 penalty; -}; - -struct tc_cbq_police -{ - unsigned char police; - unsigned char __res1; - unsigned short __res2; -}; - -struct tc_cbq_fopt -{ - __u32 split; - __u32 defmap; - __u32 defchange; -}; - -struct tc_cbq_xstats -{ - __u32 borrows; - __u32 overactions; - __s32 avgidle; - __s32 undertime; -}; - -enum -{ - TCA_CBQ_UNSPEC, - TCA_CBQ_LSSOPT, - TCA_CBQ_WRROPT, - TCA_CBQ_FOPT, - TCA_CBQ_OVL_STRATEGY, - TCA_CBQ_RATE, - TCA_CBQ_RTAB, - TCA_CBQ_POLICE, -}; - -#define TCA_CBQ_MAX TCA_CBQ_POLICE - -/* dsmark section */ - -enum { - TCA_DSMARK_UNSPEC, - TCA_DSMARK_INDICES, - TCA_DSMARK_DEFAULT_INDEX, - TCA_DSMARK_SET_TC_INDEX, - TCA_DSMARK_MASK, - TCA_DSMARK_VALUE -}; - -#define TCA_DSMARK_MAX TCA_DSMARK_VALUE - -/* ATM section */ - -enum { - TCA_ATM_UNSPEC, - TCA_ATM_FD, /* file/socket descriptor */ - TCA_ATM_PTR, /* pointer to descriptor - later */ - TCA_ATM_HDR, /* LL header */ - TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ - TCA_ATM_ADDR, /* PVC address (for output only) */ - TCA_ATM_STATE /* VC state (ATM_VS_*; for output only) */ -}; - -#define TCA_ATM_MAX TCA_ATM_STATE - -#endif diff --git a/xen-2.4.16/include/xeno/skbuff.h b/xen-2.4.16/include/xeno/skbuff.h index dd6257b370..5be56a7cc7 100644 --- a/xen-2.4.16/include/xeno/skbuff.h +++ b/xen-2.4.16/include/xeno/skbuff.h @@ -16,8 +16,6 @@ #include #include -//#include -//#include #include #include #include @@ -26,7 +24,6 @@ #include #include #include -//#include // vif special values. #define VIF_PHYSICAL_INTERFACE -1 @@ -144,8 +141,6 @@ struct sk_buff { struct sk_buff * prev; /* Previous buffer in list */ struct sk_buff_head * list; /* List we are on */ - struct sock *sk; /* Socket we are owned by */ - struct timeval stamp; /* Time we arrived */ struct net_device *dev; /* Device we arrived on/are leaving by */ /* Transport layer header */ @@ -177,66 +172,25 @@ struct sk_buff { unsigned char *raw; } mac; -// struct dst_entry *dst; - - /* - * This is the control buffer. It is free to use for every - * layer. Please put your private variables there. If you - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[48]; - unsigned int len; /* Length of actual data */ unsigned int data_len; unsigned int csum; /* Checksum */ - unsigned char __unused, /* Dead field, may be reused */ + unsigned char skb_type, cloned, /* head may be cloned (check refcnt to be sure) */ pkt_type, /* Packet class */ ip_summed; /* Driver fed us an IP checksum */ - __u32 priority; /* Packet queueing priority */ atomic_t users; /* User count - see datagram.c,tcp.c */ unsigned short protocol; /* Packet protocol from driver. */ - unsigned short security; /* Security level of packet */ - unsigned int truesize; /* Buffer size */ - unsigned char *head; /* Head of buffer */ unsigned char *data; /* Data head pointer */ unsigned char *tail; /* Tail pointer */ unsigned char *end; /* End pointer */ void (*destructor)(struct sk_buff *); /* Destruct function */ - - unsigned int skb_type; /* SKB_NORMAL or SKB_ZERO_COPY */ struct pfn_info *pf; /* record of physical pf address for freeing */ int src_vif; /* vif we came from */ int dst_vif; /* vif we are bound for */ struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ - - - - -#ifdef CONFIG_NETFILTER - /* Can be used for communication between hooks. */ - unsigned long nfmark; - /* Cache info */ - __u32 nfcache; - /* Associated connection, if any */ - struct nf_ct_info *nfct; -#ifdef CONFIG_NETFILTER_DEBUG - unsigned int nf_debug; -#endif -#endif /*CONFIG_NETFILTER*/ - -#if defined(CONFIG_HIPPI) - union{ - __u32 ifield; - } private; -#endif - -#ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ -#endif }; #define SK_WMEM_MAX 65535 @@ -1000,7 +954,6 @@ static inline void skb_orphan(struct sk_buff *skb) if (skb->destructor) skb->destructor(skb); skb->destructor = NULL; - skb->sk = NULL; } /** @@ -1130,10 +1083,8 @@ static inline void kunmap_skb_frag(void *vaddr) skb=skb->next) -extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); -extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); diff --git a/xen-2.4.16/include/xeno/sockios.h b/xen-2.4.16/include/xeno/sockios.h index 81f99a124c..a0ad8100bf 100644 --- a/xen-2.4.16/include/xeno/sockios.h +++ b/xen-2.4.16/include/xeno/sockios.h @@ -69,9 +69,6 @@ #define SIOCGIFBR 0x8940 /* Bridging support */ #define SIOCSIFBR 0x8941 /* Set bridging options */ -#define SIOCGIFTXQLEN 0x8942 /* Get the tx queue length */ -#define SIOCSIFTXQLEN 0x8943 /* Set the tx queue length */ - #define SIOCGIFDIVERT 0x8944 /* Frame diversion support */ #define SIOCSIFDIVERT 0x8945 /* Set frame diversion options */ diff --git a/xen-2.4.16/include/xeno/vif.h b/xen-2.4.16/include/xeno/vif.h index 4af4f24435..9bf5e7686e 100644 --- a/xen-2.4.16/include/xeno/vif.h +++ b/xen-2.4.16/include/xeno/vif.h @@ -32,10 +32,17 @@ typedef struct rx_shadow_entry_st { unsigned long flush_count; } rx_shadow_entry_t; +typedef struct tx_shadow_entry_st { + unsigned long addr; + unsigned long size; + int status; +} tx_shadow_entry_t; + typedef struct net_shadow_ring_st { rx_shadow_entry_t *rx_ring; + tx_shadow_entry_t *tx_ring; unsigned int rx_prod, rx_cons, rx_idx; - unsigned int tx_cons; /* ahead of shared tx_cons */ + unsigned int tx_prod, tx_cons, tx_idx; } net_shadow_ring_t; typedef struct net_vif_st { diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c index 1d024cb805..a01ad51fa3 100644 --- a/xen-2.4.16/net/dev.c +++ b/xen-2.4.16/net/dev.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -60,12 +59,6 @@ struct net_device *the_dev = NULL; struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; -/********************************************************************************* - - Device Interface Subroutines - -**********************************************************************************/ - /** * __dev_get_by_name - find a device by its name * @name: name to find @@ -80,13 +73,13 @@ struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; struct net_device *__dev_get_by_name(const char *name) { - struct net_device *dev; + struct net_device *dev; - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (strncmp(dev->name, name, IFNAMSIZ) == 0) - return dev; - } - return NULL; + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (strncmp(dev->name, name, IFNAMSIZ) == 0) + return dev; + } + return NULL; } /** @@ -102,14 +95,14 @@ struct net_device *__dev_get_by_name(const char *name) struct net_device *dev_get_by_name(const char *name) { - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_name(name); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); + return dev; } /* @@ -134,12 +127,12 @@ struct net_device *dev_get_by_name(const char *name) int dev_get(const char *name) { - struct net_device *dev; + struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); - read_unlock(&dev_base_lock); - return dev != NULL; + read_lock(&dev_base_lock); + dev = __dev_get_by_name(name); + read_unlock(&dev_base_lock); + return dev != NULL; } /** @@ -155,13 +148,13 @@ int dev_get(const char *name) struct net_device * __dev_get_by_index(int ifindex) { - struct net_device *dev; + struct net_device *dev; - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (dev->ifindex == ifindex) - return dev; - } - return NULL; + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->ifindex == ifindex) + return dev; + } + return NULL; } @@ -177,14 +170,14 @@ struct net_device * __dev_get_by_index(int ifindex) struct net_device * dev_get_by_index(int ifindex) { - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_index(ifindex); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); + return dev; } /** @@ -203,14 +196,14 @@ struct net_device * dev_get_by_index(int ifindex) struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) { - struct net_device *dev; - - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (dev->type == type && - memcmp(dev->dev_addr, ha, dev->addr_len) == 0) - return dev; - } - return NULL; + struct net_device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->type == type && + memcmp(dev->dev_addr, ha, dev->addr_len) == 0) + return dev; + } + return NULL; } /** @@ -227,30 +220,30 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) int dev_alloc_name(struct net_device *dev, const char *name) { - int i; - char buf[32]; - char *p; - - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters, or no "%" characters at all. - */ - p = strchr(name, '%'); - if (p && (p[1] != 'd' || strchr(p+2, '%'))) - return -EINVAL; - - /* - * If you need over 100 please also fix the algorithm... - */ - for (i = 0; i < 100; i++) { - snprintf(buf,sizeof(buf),name,i); - if (__dev_get_by_name(buf) == NULL) { - strcpy(dev->name, buf); - return i; - } - } - return -ENFILE; /* Over 100 of the things .. bail out! */ + int i; + char buf[32]; + char *p; + + /* + * Verify the string as this thing may have come from + * the user. There must be either one "%d" and no other "%" + * characters, or no "%" characters at all. + */ + p = strchr(name, '%'); + if (p && (p[1] != 'd' || strchr(p+2, '%'))) + return -EINVAL; + + /* + * If you need over 100 please also fix the algorithm... + */ + for (i = 0; i < 100; i++) { + snprintf(buf,sizeof(buf),name,i); + if (__dev_get_by_name(buf) == NULL) { + strcpy(dev->name, buf); + return i; + } + } + return -ENFILE; /* Over 100 of the things .. bail out! */ } /** @@ -271,18 +264,18 @@ int dev_alloc_name(struct net_device *dev, const char *name) struct net_device *dev_alloc(const char *name, int *err) { - struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL); - if (dev == NULL) { - *err = -ENOBUFS; - return NULL; - } - memset(dev, 0, sizeof(struct net_device)); - *err = dev_alloc_name(dev, name); - if (*err < 0) { - kfree(dev); - return NULL; - } - return dev; + struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL); + if (dev == NULL) { + *err = -ENOBUFS; + return NULL; + } + memset(dev, 0, sizeof(struct net_device)); + *err = dev_alloc_name(dev, name); + if (*err < 0) { + kfree(dev); + return NULL; + } + return dev; } /** @@ -296,10 +289,10 @@ struct net_device *dev_alloc(const char *name, int *err) void netdev_state_change(struct net_device *dev) { - if (dev->flags&IFF_UP) { - notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); - } + if (dev->flags&IFF_UP) { + notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + } } @@ -316,8 +309,8 @@ void netdev_state_change(struct net_device *dev) void dev_load(const char *name) { - if (!dev_get(name) && capable(CAP_SYS_MODULE)) - request_module(name); + if (!dev_get(name) && capable(CAP_SYS_MODULE)) + request_module(name); } #else @@ -328,9 +321,10 @@ extern inline void dev_load(const char *unused){;} static int default_rebuild_header(struct sk_buff *skb) { - printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!"); - kfree_skb(skb); - return 1; + printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", + skb->dev ? skb->dev->name : "NULL!!!"); + kfree_skb(skb); + return 1; } /** @@ -348,63 +342,63 @@ static int default_rebuild_header(struct sk_buff *skb) int dev_open(struct net_device *dev) { - int ret = 0; - - /* - * Is it already up? - */ - - if (dev->flags&IFF_UP) - return 0; - - /* - * Is it even present? - */ - if (!netif_device_present(dev)) - return -ENODEV; - - /* - * Call device private open method - */ - if (try_inc_mod_count(dev->owner)) { - if (dev->open) { - ret = dev->open(dev); - if (ret != 0 && dev->owner) - __MOD_DEC_USE_COUNT(dev->owner); - } - } else { - ret = -ENODEV; - } - - /* - * If it went open OK then: - */ + int ret = 0; + + /* + * Is it already up? + */ + + if (dev->flags&IFF_UP) + return 0; + + /* + * Is it even present? + */ + if (!netif_device_present(dev)) + return -ENODEV; + + /* + * Call device private open method + */ + if (try_inc_mod_count(dev->owner)) { + if (dev->open) { + ret = dev->open(dev); + if (ret != 0 && dev->owner) + __MOD_DEC_USE_COUNT(dev->owner); + } + } else { + ret = -ENODEV; + } + + /* + * If it went open OK then: + */ - if (ret == 0) - { - /* - * Set the flags. - */ - dev->flags |= IFF_UP; - - set_bit(__LINK_STATE_START, &dev->state); - - /* - * Initialize multicasting status - */ - dev_mc_upload(dev); - - /* - * Wakeup transmit queue engine - */ - dev_activate(dev); - - /* - * ... and announce new interface. - */ - notifier_call_chain(&netdev_chain, NETDEV_UP, dev); - } - return(ret); + if (ret == 0) + { + /* + * Set the flags. + */ + dev->flags |= IFF_UP; + + set_bit(__LINK_STATE_START, &dev->state); + + /* + * Initialize multicasting status + */ + dev_mc_upload(dev); + + /* + * Wakeup transmit queue engine + */ + dev_activate(dev); + + /* + * ... and announce new interface. + */ + notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + } + return(ret); } @@ -420,48 +414,48 @@ int dev_open(struct net_device *dev) int dev_close(struct net_device *dev) { - if (!(dev->flags&IFF_UP)) - return 0; - - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); - - dev_deactivate(dev); - - clear_bit(__LINK_STATE_START, &dev->state); - - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ + if (!(dev->flags&IFF_UP)) + return 0; + + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + + dev_deactivate(dev); + + clear_bit(__LINK_STATE_START, &dev->state); + + /* + * Call the device specific close. This cannot fail. + * Only if device is UP + * + * We allow it to be called even after a DETACH hot-plug + * event. + */ - if (dev->stop) - dev->stop(dev); + if (dev->stop) + dev->stop(dev); - /* - * Device is now down. - */ + /* + * Device is now down. + */ - dev->flags &= ~IFF_UP; + dev->flags &= ~IFF_UP; - /* - * Tell people we are down - */ - notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + /* + * Tell people we are down + */ + notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); - /* - * Drop the module refcount - */ - if (dev->owner) - __MOD_DEC_USE_COUNT(dev->owner); + /* + * Drop the module refcount + */ + if (dev->owner) + __MOD_DEC_USE_COUNT(dev->owner); - return(0); + return(0); } @@ -474,113 +468,77 @@ int dev_close(struct net_device *dev) static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { - int i; + int i; - if (dev->features&NETIF_F_HIGHDMA) - return 0; + if (dev->features&NETIF_F_HIGHDMA) + return 0; - for (i=0; inr_frags; i++) - if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) - return 1; + for (i=0; inr_frags; i++) + if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) + return 1; - return 0; + return 0; } #else #define illegal_highdma(dev, skb) (0) #endif -/** - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit +/* + * dev_queue_xmit - transmit a buffer + * @skb: buffer to transmit * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling this - * function. The function can be called from an interrupt. + * Queue a buffer for transmission to a network device. The caller must + * have set the device and priority and built the buffer before calling this + * function. The function can be called from an interrupt. * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. + * A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. */ int dev_queue_xmit(struct sk_buff *skb) { - struct net_device *dev = skb->dev; - struct Qdisc *q; + struct net_device *dev = skb->dev; - if (!(dev->features&NETIF_F_SG)) - { - printk("NIC doesn't do scatter-gather!\n"); - BUG(); - } + if (!(dev->features&NETIF_F_SG)) + { + printk("NIC doesn't do scatter-gather!\n"); + BUG(); + } - if (skb_shinfo(skb)->frag_list && - !(dev->features&NETIF_F_FRAGLIST) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return -ENOMEM; - } - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return -ENOMEM; - } - - /* Grab device queue */ - spin_lock_bh(&dev->queue_lock); - q = dev->qdisc; - if (q->enqueue) { - int ret = q->enqueue(skb, q); - - qdisc_run(dev); - - spin_unlock_bh(&dev->queue_lock); - return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret; - } - - /* The device has no queue. Common case for software devices: - loopback, all the sorts of tunnels... - - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics counters.) - However, it is possible, that they rely on protection - made by us here. - - Check this and shot the lock. It is not prone from deadlocks. - Either shot noqueue qdisc, it is even simpler 8) - */ - if (dev->flags&IFF_UP) { - int cpu = smp_processor_id(); - - if (dev->xmit_lock_owner != cpu) { - spin_unlock(&dev->queue_lock); - spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = cpu; - - if (!netif_queue_stopped(dev)) { - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - return 0; - } - } - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - kfree_skb(skb); - return -ENETDOWN; - } else { - /* Recursion is detected! It is possible, unfortunately */ - } - } - spin_unlock_bh(&dev->queue_lock); - - kfree_skb(skb); - return -ENETDOWN; + if (skb_shinfo(skb)->frag_list && + !(dev->features&NETIF_F_FRAGLIST) && + skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + return -ENOMEM; + } + + spin_lock_bh(&dev->queue_lock); + if (dev->flags&IFF_UP) { + int cpu = smp_processor_id(); + + if (dev->xmit_lock_owner != cpu) { + spin_unlock(&dev->queue_lock); + spin_lock(&dev->xmit_lock); + dev->xmit_lock_owner = cpu; + + if (!netif_queue_stopped(dev)) { + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + return 0; + } + } + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + kfree_skb(skb); + return -ENETDOWN; + } + } + spin_unlock_bh(&dev->queue_lock); + + kfree_skb(skb); + return -ENETDOWN; } @@ -609,133 +567,114 @@ spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; static struct { - void (*stimul)(struct net_device *); - struct net_device *dev; + void (*stimul)(struct net_device *); + struct net_device *dev; } netdev_fc_slots[BITS_PER_LONG]; -int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)) +int netdev_register_fc(struct net_device *dev, + void (*stimul)(struct net_device *dev)) { - int bit = 0; - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (netdev_fc_mask != ~0UL) { - bit = ffz(netdev_fc_mask); - netdev_fc_slots[bit].stimul = stimul; - netdev_fc_slots[bit].dev = dev; - set_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); - } - spin_unlock_irqrestore(&netdev_fc_lock, flags); - return bit; + int bit = 0; + unsigned long flags; + + spin_lock_irqsave(&netdev_fc_lock, flags); + if (netdev_fc_mask != ~0UL) { + bit = ffz(netdev_fc_mask); + netdev_fc_slots[bit].stimul = stimul; + netdev_fc_slots[bit].dev = dev; + set_bit(bit, &netdev_fc_mask); + clear_bit(bit, &netdev_fc_xoff); + } + spin_unlock_irqrestore(&netdev_fc_lock, flags); + return bit; } void netdev_unregister_fc(int bit) { - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (bit > 0) { - netdev_fc_slots[bit].stimul = NULL; - netdev_fc_slots[bit].dev = NULL; - clear_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); - } - spin_unlock_irqrestore(&netdev_fc_lock, flags); + unsigned long flags; + + spin_lock_irqsave(&netdev_fc_lock, flags); + if (bit > 0) { + netdev_fc_slots[bit].stimul = NULL; + netdev_fc_slots[bit].dev = NULL; + clear_bit(bit, &netdev_fc_mask); + clear_bit(bit, &netdev_fc_xoff); + } + spin_unlock_irqrestore(&netdev_fc_lock, flags); } static void netdev_wakeup(void) { - unsigned long xoff; - - spin_lock(&netdev_fc_lock); - xoff = netdev_fc_xoff; - netdev_fc_xoff = 0; - while (xoff) { - int i = ffz(~xoff); - xoff &= ~(1<> 1)+ (blog >> 1); - - if (avg_blog > mod_cong) { - /* Above moderate congestion levels. */ - softnet_data[cpu].cng_level = NET_RX_CN_HIGH; - } else if (avg_blog > lo_cong) { - softnet_data[cpu].cng_level = NET_RX_CN_MOD; - } else if (avg_blog > no_cong) - softnet_data[cpu].cng_level = NET_RX_CN_LOW; - else /* no congestion */ - softnet_data[cpu].cng_level = NET_RX_SUCCESS; - - softnet_data[cpu].avg_blog = avg_blog; -} - void deliver_packet(struct sk_buff *skb, net_vif_t *vif) { - net_shadow_ring_t *shadow_ring; - rx_shadow_entry_t *rx; - unsigned long *g_pte; - struct pfn_info *g_pfn, *h_pfn; - unsigned int i; - - memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); - if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) - { - memset(skb->nh.raw + 18, 0, ETH_ALEN); - } - shadow_ring = vif->shadow_ring; + net_shadow_ring_t *shadow_ring; + rx_shadow_entry_t *rx; + unsigned long *g_pte; + struct pfn_info *g_pfn, *h_pfn; + unsigned int i; - if ( (i = shadow_ring->rx_cons) == shadow_ring->rx_prod ) - { - return; - } + memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); + if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) + { + memset(skb->nh.raw + 18, 0, ETH_ALEN); + } + shadow_ring = vif->shadow_ring; - if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK ) - { - DPRINTK("Bad buffer in deliver_packet()\n"); - shadow_ring->rx_cons = RX_RING_INC(i); - return; - } + if ( (i = shadow_ring->rx_cons) == shadow_ring->rx_prod ) + { + return; + } + + if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK ) + { + DPRINTK("Bad buffer in deliver_packet()\n"); + shadow_ring->rx_cons = RX_RING_INC(i); + return; + } - rx = shadow_ring->rx_ring + i; - if ( (skb->len + ETH_HLEN) < rx->size ) - rx->size = skb->len + ETH_HLEN; + rx = shadow_ring->rx_ring + i; + if ( (skb->len + ETH_HLEN) < rx->size ) + rx->size = skb->len + ETH_HLEN; - g_pte = map_domain_mem(rx->addr); + g_pte = map_domain_mem(rx->addr); - g_pfn = frame_table + (*g_pte >> PAGE_SHIFT); - h_pfn = skb->pf; + g_pfn = frame_table + (*g_pte >> PAGE_SHIFT); + h_pfn = skb->pf; - h_pfn->tot_count = h_pfn->type_count = 1; - g_pfn->tot_count = g_pfn->type_count = 0; - h_pfn->flags = g_pfn->flags & (~PG_type_mask); + h_pfn->tot_count = h_pfn->type_count = 1; + g_pfn->tot_count = g_pfn->type_count = 0; + h_pfn->flags = g_pfn->flags & (~PG_type_mask); - if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page; - g_pfn->flags = 0; + if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page; + g_pfn->flags = 0; - /* Point the guest at the new machine frame. */ - machine_to_phys_mapping[h_pfn - frame_table] - = machine_to_phys_mapping[g_pfn - frame_table]; - *g_pte = (*g_pte & ~PAGE_MASK) - | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK); - *g_pte |= _PAGE_PRESENT; + /* Point the guest at the new machine frame. */ + machine_to_phys_mapping[h_pfn - frame_table] + = machine_to_phys_mapping[g_pfn - frame_table]; + *g_pte = (*g_pte & ~PAGE_MASK) + | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK); + *g_pte |= _PAGE_PRESENT; - unmap_domain_mem(g_pte); + unmap_domain_mem(g_pte); - /* Our skbuff now points at the guest's old frame. */ - skb->pf = g_pfn; + /* Our skbuff now points at the guest's old frame. */ + skb->pf = g_pfn; - shadow_ring->rx_cons = RX_RING_INC(i); + shadow_ring->rx_cons = RX_RING_INC(i); } /* Deliver skb to an old protocol, which is not threaded well @@ -763,211 +702,207 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) int netif_rx(struct sk_buff *skb) { #ifdef CONFIG_SMP - unsigned long cpu_mask; + unsigned long cpu_mask; #endif - struct task_struct *p; - int this_cpu = smp_processor_id(); - struct softnet_data *queue; - unsigned long flags; - net_vif_t *vif; + struct task_struct *p; + int this_cpu = smp_processor_id(); + struct softnet_data *queue; + unsigned long flags; + net_vif_t *vif; - local_irq_save(flags); + local_irq_save(flags); - if (skb->skb_type != SKB_ZERO_COPY) - BUG(); - - if (skb->stamp.tv_sec == 0) - get_fast_time(&skb->stamp); - - if ( (skb->data - skb->head) != (18 + ETH_HLEN) ) - BUG(); + ASSERT(skb->skb_type == SKB_ZERO_COPY); + ASSERT((skb->data - skb->head) == (18 + ETH_HLEN)); - skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); - /* - * remapping this address really screws up all the skb pointers. We - * need to map them all here sufficiently to get the packet - * demultiplexed. this remapping happens more than once in the code and - * is grim. It will be fixed in a later update -- drivers should be - * able to align the packet arbitrarily. - */ + /* + * remapping this address really screws up all the skb pointers. We + * need to map them all here sufficiently to get the packet + * demultiplexed. this remapping happens more than once in the code and + * is grim. It will be fixed in a later update -- drivers should be + * able to align the packet arbitrarily. + */ - skb->data = skb->head; - skb_reserve(skb,18); /* 18 is the 16 from dev_alloc_skb plus 2 for - IP header alignment. */ - skb->mac.raw = skb->data; - skb->data += ETH_HLEN; - skb->nh.raw = skb->data; + skb->data = skb->head; + skb_reserve(skb,18); /* 18 is the 16 from dev_alloc_skb plus 2 for + IP header alignment. */ + skb->mac.raw = skb->data; + skb->data += ETH_HLEN; + skb->nh.raw = skb->data; - queue = &softnet_data[this_cpu]; + queue = &softnet_data[this_cpu]; - netdev_rx_stat[this_cpu].total++; + netdev_rx_stat[this_cpu].total++; - if ( skb->src_vif == VIF_UNKNOWN_INTERFACE ) - skb->src_vif = VIF_PHYSICAL_INTERFACE; + if ( skb->src_vif == VIF_UNKNOWN_INTERFACE ) + skb->src_vif = VIF_PHYSICAL_INTERFACE; - if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE ) - skb->dst_vif = __net_get_target_vif(skb->mac.raw, skb->len, skb->src_vif); + if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE ) + skb->dst_vif = __net_get_target_vif(skb->mac.raw, + skb->len, skb->src_vif); - if ( (vif = sys_vif_list[skb->dst_vif]) == NULL ) - goto drop; + if ( (vif = sys_vif_list[skb->dst_vif]) == NULL ) + goto drop; - /* - * This lock-and-walk of the task list isn't really necessary, and is - * an artifact of the old code. The vif contains a pointer to the skb - * list we are going to queue the packet in, so the lock and the inner - * loop could be removed. The argument against this is a possible race - * in which a domain is killed as packets are being delivered to it. - * This would result in the dest vif vanishing before we can deliver to - * it. - */ + /* + * This lock-and-walk of the task list isn't really necessary, and is + * an artifact of the old code. The vif contains a pointer to the skb + * list we are going to queue the packet in, so the lock and the inner + * loop could be removed. The argument against this is a possible race + * in which a domain is killed as packets are being delivered to it. + * This would result in the dest vif vanishing before we can deliver to + * it. + */ - if ( skb->dst_vif >= VIF_PHYSICAL_INTERFACE ) - { - read_lock(&tasklist_lock); - p = &idle0_task; - do { - if ( p->domain != vif->domain ) continue; - if ( vif->skb_list.qlen > 100 ) break; - deliver_packet(skb, vif); - cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX); - read_unlock(&tasklist_lock); - goto found; - } - while ( (p = p->next_task) != &idle0_task ); - read_unlock(&tasklist_lock); - goto drop; + if ( skb->dst_vif >= VIF_PHYSICAL_INTERFACE ) + { + read_lock(&tasklist_lock); + p = &idle0_task; + do { + if ( p->domain != vif->domain ) continue; + if ( vif->skb_list.qlen > 100 ) break; + deliver_packet(skb, vif); + cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX); + read_unlock(&tasklist_lock); + goto found; } + while ( (p = p->next_task) != &idle0_task ); + read_unlock(&tasklist_lock); + goto drop; + } -drop: - netdev_rx_stat[this_cpu].dropped++; - unmap_domain_mem(skb->head); - kfree_skb(skb); - local_irq_restore(flags); - return NET_RX_DROP; - -found: - unmap_domain_mem(skb->head); - skb->head = skb->data = skb->tail = (void *)0xdeadbeef; - kfree_skb(skb); - hyp_event_notify(cpu_mask); - local_irq_restore(flags); - return 0; + drop: + netdev_rx_stat[this_cpu].dropped++; + unmap_domain_mem(skb->head); + kfree_skb(skb); + local_irq_restore(flags); + return NET_RX_DROP; + + found: + unmap_domain_mem(skb->head); + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; + kfree_skb(skb); + hyp_event_notify(cpu_mask); + local_irq_restore(flags); + return 0; } -static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) +static int deliver_to_old_ones(struct packet_type *pt, + struct sk_buff *skb, int last) { - static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; - int ret = NET_RX_DROP; + static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; + int ret = NET_RX_DROP; - if (!last) { - skb = skb_clone(skb, GFP_ATOMIC); - if (skb == NULL) - return ret; - } - if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return ret; - } + if (!last) { + skb = skb_clone(skb, GFP_ATOMIC); + if (skb == NULL) + return ret; + } + if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + return ret; + } - /* The assumption (correct one) is that old protocols - did not depened on BHs different of NET_BH and TIMER_BH. - */ + /* The assumption (correct one) is that old protocols + did not depened on BHs different of NET_BH and TIMER_BH. + */ - /* Emulate NET_BH with special spinlock */ - spin_lock(&net_bh_lock); + /* Emulate NET_BH with special spinlock */ + spin_lock(&net_bh_lock); - /* Disable timers and wait for all timers completion */ - tasklet_disable(bh_task_vec+TIMER_BH); + /* Disable timers and wait for all timers completion */ + tasklet_disable(bh_task_vec+TIMER_BH); - ret = pt->func(skb, skb->dev, pt); + ret = pt->func(skb, skb->dev, pt); - tasklet_hi_enable(bh_task_vec+TIMER_BH); - spin_unlock(&net_bh_lock); - return ret; + tasklet_hi_enable(bh_task_vec+TIMER_BH); + spin_unlock(&net_bh_lock); + return ret; } -static void net_tx_action(struct softirq_action *h) +static void net_tx_action(unsigned long unused) { - int cpu = smp_processor_id(); - - if (softnet_data[cpu].completion_queue) { - struct sk_buff *clist; - - local_irq_disable(); - clist = softnet_data[cpu].completion_queue; - softnet_data[cpu].completion_queue = NULL; - local_irq_enable(); - - while (clist != NULL) { - struct sk_buff *skb = clist; - clist = clist->next; - - BUG_TRAP(atomic_read(&skb->users) == 0); - __kfree_skb(skb); - } - } - - if (softnet_data[cpu].output_queue) { - struct net_device *head; - - local_irq_disable(); - head = softnet_data[cpu].output_queue; - softnet_data[cpu].output_queue = NULL; - local_irq_enable(); - - while (head != NULL) { - struct net_device *dev = head; - head = head->next_sched; - - smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); - - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - } else { - netif_schedule(dev); - } - } - } -} + int cpu = smp_processor_id(); + + if (softnet_data[cpu].completion_queue) { + struct sk_buff *clist; + + local_irq_disable(); + clist = softnet_data[cpu].completion_queue; + softnet_data[cpu].completion_queue = NULL; + local_irq_enable(); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + + BUG_TRAP(atomic_read(&skb->users) == 0); + __kfree_skb(skb); + } + } + + if (softnet_data[cpu].output_queue) { + struct net_device *head; + local_irq_disable(); + head = softnet_data[cpu].output_queue; + softnet_data[cpu].output_queue = NULL; + local_irq_enable(); + + while (head != NULL) { + struct net_device *dev = head; + head = head->next_sched; + + smp_mb__before_clear_bit(); + clear_bit(__LINK_STATE_SCHED, &dev->state); + + if (spin_trylock(&dev->queue_lock)) { + /*qdisc_run(dev); XXX KAF */ + spin_unlock(&dev->queue_lock); + } else { + netif_schedule(dev); + } + } + } +} +DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL; #endif static __inline__ int handle_bridge(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev) { - int ret = NET_RX_DROP; - - if (pt_prev) { - if (!pt_prev->data) - ret = deliver_to_old_ones(pt_prev, skb, 0); - else { - atomic_inc(&skb->users); - ret = pt_prev->func(skb, skb->dev, pt_prev); - } - } + int ret = NET_RX_DROP; + + if (pt_prev) { + if (!pt_prev->data) + ret = deliver_to_old_ones(pt_prev, skb, 0); + else { + atomic_inc(&skb->users); + ret = pt_prev->func(skb, skb->dev, pt_prev); + } + } #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - br_handle_frame_hook(skb); + br_handle_frame_hook(skb); #endif - return ret; + return ret; } #ifdef CONFIG_NET_DIVERT static inline void handle_diverter(struct sk_buff *skb) { - /* if diversion is supported on device, then divert */ - if (skb->dev->divert && skb->dev->divert->divert) - divert_frame(skb); + /* if diversion is supported on device, then divert */ + if (skb->dev->divert && skb->dev->divert->divert) + divert_frame(skb); } #endif /* CONFIG_NET_DIVERT */ @@ -1031,29 +966,29 @@ void update_shared_ring(void) static int dev_ifname(struct ifreq *arg) { - struct net_device *dev; - struct ifreq ifr; + struct net_device *dev; + struct ifreq ifr; - /* - * Fetch the caller's info block. - */ + /* + * Fetch the caller's info block. + */ - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); - if (!dev) { - read_unlock(&dev_base_lock); - return -ENODEV; - } - - strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + read_lock(&dev_base_lock); + dev = __dev_get_by_index(ifr.ifr_ifindex); + if (!dev) { + read_unlock(&dev_base_lock); + return -ENODEV; + } + + strcpy(ifr.ifr_name, dev->name); + read_unlock(&dev_base_lock); + + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return 0; } @@ -1071,28 +1006,28 @@ static int dev_ifname(struct ifreq *arg) int netdev_set_master(struct net_device *slave, struct net_device *master) { - struct net_device *old = slave->master; + struct net_device *old = slave->master; - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } + if (master) { + if (old) + return -EBUSY; + dev_hold(master); + } - br_write_lock_bh(BR_NETPROTO_LOCK); - slave->master = master; - br_write_unlock_bh(BR_NETPROTO_LOCK); + br_write_lock_bh(BR_NETPROTO_LOCK); + slave->master = master; + br_write_unlock_bh(BR_NETPROTO_LOCK); - if (old) - dev_put(old); + if (old) + dev_put(old); - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; + if (master) + slave->flags |= IFF_SLAVE; + else + slave->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; + rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); + return 0; } /** @@ -1108,23 +1043,23 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) void dev_set_promiscuity(struct net_device *dev, int inc) { - unsigned short old_flags = dev->flags; + unsigned short old_flags = dev->flags; - dev->flags |= IFF_PROMISC; - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - if (dev->flags^old_flags) { + dev->flags |= IFF_PROMISC; + if ((dev->promiscuity += inc) == 0) + dev->flags &= ~IFF_PROMISC; + if (dev->flags^old_flags) { #ifdef CONFIG_NET_FASTROUTE - if (dev->flags&IFF_PROMISC) { - netdev_fastroute_obstacles++; - dev_clear_fastroute(dev); - } else - netdev_fastroute_obstacles--; + if (dev->flags&IFF_PROMISC) { + netdev_fastroute_obstacles++; + dev_clear_fastroute(dev); + } else + netdev_fastroute_obstacles--; #endif - dev_mc_upload(dev); - printk(KERN_INFO "device %s %s promiscuous mode\n", - dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left"); - } + dev_mc_upload(dev); + printk(KERN_INFO "device %s %s promiscuous mode\n", + dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left"); + } } /** @@ -1141,73 +1076,74 @@ void dev_set_promiscuity(struct net_device *dev, int inc) void dev_set_allmulti(struct net_device *dev, int inc) { - unsigned short old_flags = dev->flags; + unsigned short old_flags = dev->flags; - dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; - if (dev->flags^old_flags) - dev_mc_upload(dev); + dev->flags |= IFF_ALLMULTI; + if ((dev->allmulti += inc) == 0) + dev->flags &= ~IFF_ALLMULTI; + if (dev->flags^old_flags) + dev_mc_upload(dev); } int dev_change_flags(struct net_device *dev, unsigned flags) { - int ret; - int old_flags = dev->flags; - - /* - * Set the flags on our device. - */ - - dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| - IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | - (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); - - /* - * Load in the correct multicast list now the flags have changed. - */ - - dev_mc_upload(dev); - - /* - * Have we downed the interface. We handle IFF_UP ourselves - * according to user attempts to set it, rather than blindly - * setting it. - */ - - ret = 0; - if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ - { - ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); - - if (ret == 0) - dev_mc_upload(dev); - } - - if (dev->flags&IFF_UP && - ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) - notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); - - if ((flags^dev->gflags)&IFF_PROMISC) { - int inc = (flags&IFF_PROMISC) ? +1 : -1; - dev->gflags ^= IFF_PROMISC; - dev_set_promiscuity(dev, inc); - } - - /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI - is important. Some (broken) drivers set IFF_PROMISC, when - IFF_ALLMULTI is requested not asking us and not reporting. - */ - if ((flags^dev->gflags)&IFF_ALLMULTI) { - int inc = (flags&IFF_ALLMULTI) ? +1 : -1; - dev->gflags ^= IFF_ALLMULTI; - dev_set_allmulti(dev, inc); - } - - if (old_flags^dev->flags) - rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); - - return ret; + int ret; + int old_flags = dev->flags; + + /* + * Set the flags on our device. + */ + + dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| + IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | + (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); + + /* + * Load in the correct multicast list now the flags have changed. + */ + + dev_mc_upload(dev); + + /* + * Have we downed the interface. We handle IFF_UP ourselves + * according to user attempts to set it, rather than blindly + * setting it. + */ + + ret = 0; + if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ + { + ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); + + if (ret == 0) + dev_mc_upload(dev); + } + + if (dev->flags&IFF_UP && + ((old_flags^dev->flags)& + ~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) + notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + + if ((flags^dev->gflags)&IFF_PROMISC) { + int inc = (flags&IFF_PROMISC) ? +1 : -1; + dev->gflags ^= IFF_PROMISC; + dev_set_promiscuity(dev, inc); + } + + /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI + is important. Some (broken) drivers set IFF_PROMISC, when + IFF_ALLMULTI is requested not asking us and not reporting. + */ + if ((flags^dev->gflags)&IFF_ALLMULTI) { + int inc = (flags&IFF_ALLMULTI) ? +1 : -1; + dev->gflags ^= IFF_ALLMULTI; + dev_set_allmulti(dev, inc); + } + + if (old_flags^dev->flags) + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); + + return ret; } /* @@ -1216,190 +1152,180 @@ int dev_change_flags(struct net_device *dev, unsigned flags) static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) { - struct net_device *dev; - int err; - - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) - return -ENODEV; - - switch(cmd) - { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) - |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); - if (netif_running(dev) && netif_carrier_ok(dev)) - ifr->ifr_flags |= IFF_RUNNING; - return 0; - - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); + struct net_device *dev; + int err; + + if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + return -ENODEV; + + switch(cmd) + { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) + |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); + if (netif_running(dev) && netif_carrier_ok(dev)) + ifr->ifr_flags |= IFF_RUNNING; + return 0; + + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); - case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ - ifr->ifr_metric = 0; - return 0; + case SIOCGIFMETRIC: /* Get the metric on the interface */ + ifr->ifr_metric = 0; + return 0; - case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ - return -EOPNOTSUPP; + case SIOCSIFMETRIC: /* Set the metric on the interface */ + return -EOPNOTSUPP; - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr->ifr_mtu = dev->mtu; + return 0; - case SIOCSIFMTU: /* Set the MTU of a device */ - if (ifr->ifr_mtu == dev->mtu) - return 0; + case SIOCSIFMTU: /* Set the MTU of a device */ + if (ifr->ifr_mtu == dev->mtu) + return 0; - /* - * MTU must be positive. - */ + /* + * MTU must be positive. + */ - if (ifr->ifr_mtu<0) - return -EINVAL; - - if (!netif_device_present(dev)) - return -ENODEV; - - if (dev->change_mtu) - err = dev->change_mtu(dev, ifr->ifr_mtu); - else { - dev->mtu = ifr->ifr_mtu; - err = 0; - } - if (!err && dev->flags&IFF_UP) - notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); - return err; - - case SIOCGIFHWADDR: - memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); - ifr->ifr_hwaddr.sa_family=dev->type; - return 0; + if (ifr->ifr_mtu<0) + return -EINVAL; + + if (!netif_device_present(dev)) + return -ENODEV; + + if (dev->change_mtu) + err = dev->change_mtu(dev, ifr->ifr_mtu); + else { + dev->mtu = ifr->ifr_mtu; + err = 0; + } + if (!err && dev->flags&IFF_UP) + notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); + return err; + + case SIOCGIFHWADDR: + memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); + ifr->ifr_hwaddr.sa_family=dev->type; + return 0; - case SIOCSIFHWADDR: - if (dev->set_mac_address == NULL) - return -EOPNOTSUPP; - if (ifr->ifr_hwaddr.sa_family!=dev->type) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); - if (!err) - notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); - return err; + case SIOCSIFHWADDR: + if (dev->set_mac_address == NULL) + return -EOPNOTSUPP; + if (ifr->ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); + if (!err) + notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + return err; - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family!=dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); - notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); - return 0; - - case SIOCGIFMAP: - ifr->ifr_map.mem_start=dev->mem_start; - ifr->ifr_map.mem_end=dev->mem_end; - ifr->ifr_map.base_addr=dev->base_addr; - ifr->ifr_map.irq=dev->irq; - ifr->ifr_map.dma=dev->dma; - ifr->ifr_map.port=dev->if_port; - return 0; + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); + notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + return 0; + + case SIOCGIFMAP: + ifr->ifr_map.mem_start=dev->mem_start; + ifr->ifr_map.mem_end=dev->mem_end; + ifr->ifr_map.base_addr=dev->base_addr; + ifr->ifr_map.irq=dev->irq; + ifr->ifr_map.dma=dev->dma; + ifr->ifr_map.port=dev->if_port; + return 0; - case SIOCSIFMAP: - if (dev->set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return dev->set_config(dev,&ifr->ifr_map); - } - return -EOPNOTSUPP; + case SIOCSIFMAP: + if (dev->set_config) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->set_config(dev,&ifr->ifr_map); + } + return -EOPNOTSUPP; - case SIOCADDMULTI: - if (dev->set_multicast_list == NULL || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); - return 0; - - case SIOCDELMULTI: - if (dev->set_multicast_list == NULL || - ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); - return 0; - - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen<0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; - - case SIOCSIFNAME: - if (dev->flags&IFF_UP) - return -EBUSY; - if (__dev_get_by_name(ifr->ifr_newname)) - return -EEXIST; - memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ); - dev->name[IFNAMSIZ-1] = 0; - notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); - return 0; + case SIOCADDMULTI: + if (dev->set_multicast_list == NULL || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); + return 0; + + case SIOCDELMULTI: + if (dev->set_multicast_list == NULL || + ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); + return 0; + + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCSIFNAME: + if (dev->flags&IFF_UP) + return -EBUSY; + if (__dev_get_by_name(ifr->ifr_newname)) + return -EEXIST; + memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ); + dev->name[IFNAMSIZ-1] = 0; + notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + return 0; #ifdef WIRELESS_EXT - case SIOCGIWSTATS: - return dev_iwstats(dev, ifr); + case SIOCGIWSTATS: + return dev_iwstats(dev, ifr); #endif /* WIRELESS_EXT */ - /* - * Unknown or private ioctl - */ - - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCETHTOOL || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG) { - if (dev->do_ioctl) { - if (!netif_device_present(dev)) - return -ENODEV; - return dev->do_ioctl(dev, ifr, cmd); - } - return -EOPNOTSUPP; - } + /* + * Unknown or private ioctl + */ + + default: + if ((cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) || + cmd == SIOCBONDENSLAVE || + cmd == SIOCBONDRELEASE || + cmd == SIOCBONDSETHWADDR || + cmd == SIOCBONDSLAVEINFOQUERY || + cmd == SIOCBONDINFOQUERY || + cmd == SIOCBONDCHANGEACTIVE || + cmd == SIOCETHTOOL || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG) { + if (dev->do_ioctl) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->do_ioctl(dev, ifr, cmd); + } + return -EOPNOTSUPP; + } #ifdef WIRELESS_EXT - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - if (dev->do_ioctl) { - if (!netif_device_present(dev)) - return -ENODEV; - return dev->do_ioctl(dev, ifr, cmd); - } - return -EOPNOTSUPP; - } + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + if (dev->do_ioctl) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->do_ioctl(dev, ifr, cmd); + } + return -EOPNOTSUPP; + } #endif /* WIRELESS_EXT */ - } - return -EINVAL; + } + return -EINVAL; } /* - * This function handles all "interface"-type I/O control requests. The actual - * 'doing' part of this is dev_ifsioc above. + * This function handles all "interface"-type I/O control requests. The actual + * 'doing' part of this is dev_ifsioc above. */ /** @@ -1415,172 +1341,171 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) int dev_ioctl(unsigned int cmd, void *arg) { - struct ifreq ifr; - int ret; - char *colon; - - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ + struct ifreq ifr; + int ret; + char *colon; + + /* One special case: SIOCGIFCONF takes ifconf argument + and requires shared lock, because it sleeps writing + to user space. + */ - if (cmd == SIOCGIFCONF) { - return -ENOSYS; - } - if (cmd == SIOCGIFNAME) { - return dev_ifname((struct ifreq *)arg); - } + if (cmd == SIOCGIFCONF) { + return -ENOSYS; + } + if (cmd == SIOCGIFNAME) { + return dev_ifname((struct ifreq *)arg); + } - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; - ifr.ifr_name[IFNAMSIZ-1] = 0; + ifr.ifr_name[IFNAMSIZ-1] = 0; - colon = strchr(ifr.ifr_name, ':'); - if (colon) - *colon = 0; + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; - /* - * See which interface the caller is talking about. - */ + /* + * See which interface the caller is talking about. + */ - switch(cmd) - { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ + switch(cmd) + { + /* + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value + */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); - read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); - read_unlock(&dev_base_lock); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + case SIOCGIFINDEX: + dev_load(ifr.ifr_name); + read_lock(&dev_base_lock); + ret = dev_ifsioc(&ifr, cmd); + read_unlock(&dev_base_lock); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - return a value + */ - case SIOCETHTOOL: - case SIOCGMIIPHY: - case SIOCGMIIREG: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - dev_load(ifr.ifr_name); - dev_probe_lock(); - rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); - rtnl_unlock(); - dev_probe_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ + case SIOCETHTOOL: + case SIOCGMIIPHY: + case SIOCGMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value + */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFMAP: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: - case SIOCSIFNAME: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - case SIOCBONDCHANGEACTIVE: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - dev_load(ifr.ifr_name); - dev_probe_lock(); - rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); - rtnl_unlock(); - dev_probe_unlock(); - return ret; + case SIOCSIFFLAGS: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFMAP: + case SIOCSIFHWADDR: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: + case SIOCSIFNAME: + case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + case SIOCBONDCHANGEACTIVE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + return ret; - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but currently - do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. Not applicable in our case */ - case SIOCSIFLINK: - return -EINVAL; - - /* - * Unknown or private ioctl. - */ + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but currently + do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. */ + case SIOCSIFLINK: + return -EINVAL; + + /* + * Unknown or private ioctl. + */ - default: - if (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) { - dev_load(ifr.ifr_name); - dev_probe_lock(); - rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); - rtnl_unlock(); - dev_probe_unlock(); - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return ret; - } + default: + if (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) { + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; + } #ifdef WIRELESS_EXT - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { /* If command is `set a parameter', or * `get the encoding parameters', check if * the user has the right to do it */ - if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) { - if(!capable(CAP_NET_ADMIN)) - return -EPERM; - } - dev_load(ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); - rtnl_unlock(); - if (!ret && IW_IS_GET(cmd) && - copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return ret; - } + if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) { + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + } + dev_load(ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + if (!ret && IW_IS_GET(cmd) && + copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + return -EFAULT; + return ret; + } #endif /* WIRELESS_EXT */ - return -EINVAL; - } + return -EINVAL; + } } @@ -1594,13 +1519,13 @@ int dev_ioctl(unsigned int cmd, void *arg) int dev_new_index(void) { - static int ifindex; - for (;;) { - if (++ifindex <= 0) - ifindex=1; - if (__dev_get_by_index(ifindex) == NULL) - return ifindex; - } + static int ifindex; + for (;;) { + if (++ifindex <= 0) + ifindex=1; + if (__dev_get_by_index(ifindex) == NULL) + return ifindex; + } } static int dev_boot_phase = 1; @@ -1627,77 +1552,77 @@ int net_dev_init(void); int register_netdevice(struct net_device *dev) { - struct net_device *d, **dp; + struct net_device *d, **dp; #ifdef CONFIG_NET_DIVERT - int ret; + int ret; #endif - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); - dev->xmit_lock_owner = -1; + spin_lock_init(&dev->queue_lock); + spin_lock_init(&dev->xmit_lock); + dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_FASTROUTE - dev->fastpath_lock=RW_LOCK_UNLOCKED; + dev->fastpath_lock=RW_LOCK_UNLOCKED; #endif - if (dev_boot_phase) - net_dev_init(); + if (dev_boot_phase) + net_dev_init(); #ifdef CONFIG_NET_DIVERT - ret = alloc_divert_blk(dev); - if (ret) - return ret; + ret = alloc_divert_blk(dev); + if (ret) + return ret; #endif /* CONFIG_NET_DIVERT */ - dev->iflink = -1; + dev->iflink = -1; - /* Init, if this function is available */ - if (dev->init && dev->init(dev) != 0) { + /* Init, if this function is available */ + if (dev->init && dev->init(dev) != 0) { #ifdef CONFIG_NET_DIVERT - free_divert_blk(dev); + free_divert_blk(dev); #endif - return -EIO; - } + return -EIO; + } - dev->ifindex = dev_new_index(); - if (dev->iflink == -1) - dev->iflink = dev->ifindex; + dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; - /* Check for existence, and append to tail of chain */ - for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { - if (d == dev || strcmp(d->name, dev->name) == 0) { + /* Check for existence, and append to tail of chain */ + for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev || strcmp(d->name, dev->name) == 0) { #ifdef CONFIG_NET_DIVERT - free_divert_blk(dev); + free_divert_blk(dev); #endif - return -EEXIST; - } - } - /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (dev->rebuild_header == NULL) - dev->rebuild_header = default_rebuild_header; - - /* - * Default initial state at registry is that the - * device is present. - */ - - set_bit(__LINK_STATE_PRESENT, &dev->state); - - dev->next = NULL; - dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - *dp = dev; - dev_hold(dev); - dev->deadbeaf = 0; - write_unlock_bh(&dev_base_lock); - - /* Notify protocols, that a new device appeared. */ - notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - - return 0; + return -EEXIST; + } + } + /* + * nil rebuild_header routine, + * that should be never called and used as just bug trap. + */ + + if (dev->rebuild_header == NULL) + dev->rebuild_header = default_rebuild_header; + + /* + * Default initial state at registry is that the + * device is present. + */ + + set_bit(__LINK_STATE_PRESENT, &dev->state); + + dev->next = NULL; + dev_init_scheduler(dev); + write_lock_bh(&dev_base_lock); + *dp = dev; + dev_hold(dev); + dev->deadbeaf = 0; + write_unlock_bh(&dev_base_lock); + + /* Notify protocols, that a new device appeared. */ + notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + + return 0; } /** @@ -1710,23 +1635,24 @@ int register_netdevice(struct net_device *dev) int netdev_finish_unregister(struct net_device *dev) { - BUG_TRAP(dev->ip_ptr==NULL); - BUG_TRAP(dev->ip6_ptr==NULL); - BUG_TRAP(dev->dn_ptr==NULL); - - if (!dev->deadbeaf) { - printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name); - return 0; - } + BUG_TRAP(dev->ip_ptr==NULL); + BUG_TRAP(dev->ip6_ptr==NULL); + BUG_TRAP(dev->dn_ptr==NULL); + + if (!dev->deadbeaf) { + printk(KERN_ERR "Freeing alive device %p, %s\n", + dev, dev->name); + return 0; + } #ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, - (dev->features & NETIF_F_DYNALLOC)?"":", old style"); + printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, + (dev->features & NETIF_F_DYNALLOC)?"":", old style"); #endif - if (dev->destructor) - dev->destructor(dev); - if (dev->features & NETIF_F_DYNALLOC) - kfree(dev); - return 0; + if (dev->destructor) + dev->destructor(dev); + if (dev->features & NETIF_F_DYNALLOC) + kfree(dev); + return 0; } /** @@ -1744,118 +1670,121 @@ int netdev_finish_unregister(struct net_device *dev) int unregister_netdevice(struct net_device *dev) { - unsigned long now, warning_time; - struct net_device *d, **dp; - - /* If device is running, close it first. */ - if (dev->flags & IFF_UP) - dev_close(dev); - - BUG_TRAP(dev->deadbeaf==0); - dev->deadbeaf = 1; - - /* And unlink it from device chain. */ - for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { - if (d == dev) { - write_lock_bh(&dev_base_lock); - *dp = d->next; - write_unlock_bh(&dev_base_lock); - break; - } - } - if (d == NULL) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev); - return -ENODEV; - } - - /* Synchronize to net_rx_action. */ - br_write_lock_bh(BR_NETPROTO_LOCK); - br_write_unlock_bh(BR_NETPROTO_LOCK); - - if (dev_boot_phase == 0) { - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); - - /* - * Flush the multicast chain - */ - dev_mc_discard(dev); - } - - if (dev->uninit) - dev->uninit(dev); - - /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(dev->master==NULL); + unsigned long now, warning_time; + struct net_device *d, **dp; + + /* If device is running, close it first. */ + if (dev->flags & IFF_UP) + dev_close(dev); + + BUG_TRAP(dev->deadbeaf==0); + dev->deadbeaf = 1; + + /* And unlink it from device chain. */ + for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev) { + write_lock_bh(&dev_base_lock); + *dp = d->next; + write_unlock_bh(&dev_base_lock); + break; + } + } + if (d == NULL) { + printk(KERN_DEBUG "unregister_netdevice: device %s/%p" + " not registered\n", dev->name, dev); + return -ENODEV; + } + + /* Synchronize to net_rx_action. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + if (dev_boot_phase == 0) { + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + } + + if (dev->uninit) + dev->uninit(dev); + + /* Notifier chain MUST detach us from master device. */ + BUG_TRAP(dev->master==NULL); #ifdef CONFIG_NET_DIVERT - free_divert_blk(dev); + free_divert_blk(dev); #endif - if (dev->features & NETIF_F_DYNALLOC) { + if (dev->features & NETIF_F_DYNALLOC) { #ifdef NET_REFCNT_DEBUG - if (atomic_read(&dev->refcnt) != 1) - printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1); + if (atomic_read(&dev->refcnt) != 1) + printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", + dev->name, atomic_read(&dev->refcnt)-1); #endif - dev_put(dev); - return 0; - } + dev_put(dev); + return 0; + } - /* Last reference is our one */ - if (atomic_read(&dev->refcnt) == 1) { - dev_put(dev); - return 0; - } + /* Last reference is our one */ + if (atomic_read(&dev->refcnt) == 1) { + dev_put(dev); + return 0; + } #ifdef NET_REFCNT_DEBUG - printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)); + printk("unregister_netdevice: waiting %s refcnt=%d\n", + dev->name, atomic_read(&dev->refcnt)); #endif - /* EXPLANATION. If dev->refcnt is not now 1 (our own reference) - it means that someone in the kernel still has a reference - to this device and we cannot release it. - - "New style" devices have destructors, hence we can return from this - function and destructor will do all the work later. As of kernel 2.4.0 - there are very few "New Style" devices. - - "Old style" devices expect that the device is free of any references - upon exit from this function. - We cannot return from this function until all such references have - fallen away. This is because the caller of this function will probably - immediately kfree(*dev) and then be unloaded via sys_delete_module. - - So, we linger until all references fall away. The duration of the - linger is basically unbounded! It is driven by, for example, the - current setting of sysctl_ipfrag_time. - - After 1 second, we start to rebroadcast unregister notifications - in hope that careless clients will release the device. - - */ - - now = warning_time = jiffies; - while (atomic_read(&dev->refcnt) != 1) { - if ((jiffies - now) > 1*HZ) { - /* Rebroadcast unregister notification */ - notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); - } - mdelay(250); - if ((jiffies - warning_time) > 10*HZ) { - printk(KERN_EMERG "unregister_netdevice: waiting for %s to " - "become free. Usage count = %d\n", - dev->name, atomic_read(&dev->refcnt)); - warning_time = jiffies; - } - } - dev_put(dev); - return 0; + /* EXPLANATION. If dev->refcnt is not now 1 (our own reference) + it means that someone in the kernel still has a reference + to this device and we cannot release it. + + "New style" devices have destructors, hence we can return from this + function and destructor will do all the work later. As of kernel 2.4.0 + there are very few "New Style" devices. + + "Old style" devices expect that the device is free of any references + upon exit from this function. + We cannot return from this function until all such references have + fallen away. This is because the caller of this function will probably + immediately kfree(*dev) and then be unloaded via sys_delete_module. + + So, we linger until all references fall away. The duration of the + linger is basically unbounded! It is driven by, for example, the + current setting of sysctl_ipfrag_time. + + After 1 second, we start to rebroadcast unregister notifications + in hope that careless clients will release the device. + + */ + + now = warning_time = jiffies; + while (atomic_read(&dev->refcnt) != 1) { + if ((jiffies - now) > 1*HZ) { + /* Rebroadcast unregister notification */ + notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + } + mdelay(250); + if ((jiffies - warning_time) > 10*HZ) { + printk(KERN_EMERG "unregister_netdevice: waiting for %s to " + "become free. Usage count = %d\n", + dev->name, atomic_read(&dev->refcnt)); + warning_time = jiffies; + } + } + dev_put(dev); + return 0; } @@ -1879,128 +1808,115 @@ extern void dv_init(void); */ int __init net_dev_init(void) { - struct net_device *dev, **dp; - int i; + struct net_device *dev, **dp; + int i; - if (!dev_boot_phase) - return 0; + if ( !dev_boot_phase ) + return 0; + + /* KAF: was sone in socket_init, but that top-half stuff is gone. */ + skb_init(); + + /* Initialise the packet receive queues. */ + for ( i = 0; i < NR_CPUS; i++ ) + { + struct softnet_data *queue; + queue = &softnet_data[i]; + queue->completion_queue = NULL; + } + + /* + * Add the devices. + * If the call to dev->init fails, the dev is removed + * from the chain disconnecting the device until the + * next reboot. + * + * NB At boot phase networking is dead. No locking is required. + * But we still preserve dev_base_lock for sanity. + */ + dp = &dev_base; + while ((dev = *dp) != NULL) { + spin_lock_init(&dev->queue_lock); + spin_lock_init(&dev->xmit_lock); + + dev->xmit_lock_owner = -1; + dev->iflink = -1; + dev_hold(dev); /* - * KAF: was sone in socket_init, but that top-half stuff is gone. + * Allocate name. If the init() fails + * the name will be reissued correctly. */ - skb_init(); - - /* - * Initialise the packet receive queues. - */ - - for (i = 0; i < NR_CPUS; i++) { - struct softnet_data *queue; - - queue = &softnet_data[i]; - skb_queue_head_init(&queue->input_pkt_queue); - queue->throttle = 0; - queue->cng_level = 0; - queue->avg_blog = 10; /* arbitrary non-zero */ - queue->completion_queue = NULL; - } - - /* - * Add the devices. - * If the call to dev->init fails, the dev is removed - * from the chain disconnecting the device until the - * next reboot. - * - * NB At boot phase networking is dead. No locking is required. - * But we still preserve dev_base_lock for sanity. - */ - - dp = &dev_base; - while ((dev = *dp) != NULL) { - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); - - dev->xmit_lock_owner = -1; - dev->iflink = -1; - dev_hold(dev); - - /* - * Allocate name. If the init() fails - * the name will be reissued correctly. - */ - if (strchr(dev->name, '%')) - dev_alloc_name(dev, dev->name); - - if (dev->init && dev->init(dev)) { - /* - * It failed to come up. It will be unhooked later. - * dev_alloc_name can now advance to next suitable - * name that is checked next. - */ - dev->deadbeaf = 1; - dp = &dev->next; - } else { - dp = &dev->next; - dev->ifindex = dev_new_index(); - if (dev->iflink == -1) - dev->iflink = dev->ifindex; - if (dev->rebuild_header == NULL) - dev->rebuild_header = default_rebuild_header; - dev_init_scheduler(dev); - set_bit(__LINK_STATE_PRESENT, &dev->state); - } - } - - /* - * Unhook devices that failed to come up - */ - dp = &dev_base; - while ((dev = *dp) != NULL) { - if (dev->deadbeaf) { - write_lock_bh(&dev_base_lock); - *dp = dev->next; - write_unlock_bh(&dev_base_lock); - dev_put(dev); - } else { - dp = &dev->next; - } - } - - dev_boot_phase = 0; - - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - //open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); - - dst_init(); - dev_mcast_init(); + if (strchr(dev->name, '%')) + dev_alloc_name(dev, dev->name); + + if (dev->init && dev->init(dev)) { + /* + * It failed to come up. It will be unhooked later. + * dev_alloc_name can now advance to next suitable + * name that is checked next. + */ + dev->deadbeaf = 1; + dp = &dev->next; + } else { + dp = &dev->next; + dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + if (dev->rebuild_header == NULL) + dev->rebuild_header = default_rebuild_header; + dev_init_scheduler(dev); + set_bit(__LINK_STATE_PRESENT, &dev->state); + } + } + + /* + * Unhook devices that failed to come up + */ + dp = &dev_base; + while ((dev = *dp) != NULL) { + if (dev->deadbeaf) { + write_lock_bh(&dev_base_lock); + *dp = dev->next; + write_unlock_bh(&dev_base_lock); + dev_put(dev); + } else { + dp = &dev->next; + } + } + + dev_boot_phase = 0; + + dst_init(); + dev_mcast_init(); #ifdef CONFIG_NET_SCHED - pktsched_init(); + pktsched_init(); #endif - /* - * Initialise network devices - */ + /* + * Initialise network devices + */ - net_device_init(); + net_device_init(); - return 0; + return 0; } inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev) { - memcpy(data + ETH_ALEN, dev->dev_addr, ETH_ALEN); + memcpy(data + ETH_ALEN, dev->dev_addr, ETH_ALEN); - switch ( ntohs(*(unsigned short *)(data + 12)) ) - { - case ETH_P_ARP: - if ( len < 42 ) break; - memcpy(data + 22, dev->dev_addr, 6); - return ETH_P_ARP; - case ETH_P_IP: - return ETH_P_IP; - } - return 0; + switch ( ntohs(*(unsigned short *)(data + 12)) ) + { + case ETH_P_ARP: + if ( len < 42 ) break; + memcpy(data + 22, dev->dev_addr, 6); + return ETH_P_ARP; + case ETH_P_IP: + return ETH_P_IP; + } + return 0; } diff --git a/xen-2.4.16/net/devinit.c b/xen-2.4.16/net/devinit.c new file mode 100644 index 0000000000..7770a0e7a4 --- /dev/null +++ b/xen-2.4.16/net/devinit.c @@ -0,0 +1,114 @@ +/****************************************************************************** + * devinit.c + * + * This is the watchdog timer routines, ripped from sch_generic.c + * Original copyright notice appears below. + * + */ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * Jamal Hadi Salim, 990601 + * - Ingress support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void dev_watchdog(unsigned long arg) +{ + struct net_device *dev = (struct net_device *)arg; + + spin_lock(&dev->xmit_lock); + if (netif_device_present(dev) && + netif_running(dev) && + netif_carrier_ok(dev)) { + if (netif_queue_stopped(dev) && + (jiffies - dev->trans_start) > dev->watchdog_timeo) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name); + dev->tx_timeout(dev); + } + if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + dev_hold(dev); + } + spin_unlock(&dev->xmit_lock); + + dev_put(dev); +} + +static void dev_watchdog_init(struct net_device *dev) +{ + init_timer(&dev->watchdog_timer); + dev->watchdog_timer.data = (unsigned long)dev; + dev->watchdog_timer.function = dev_watchdog; +} + +void __netdev_watchdog_up(struct net_device *dev) +{ + if (dev->tx_timeout) { + if (dev->watchdog_timeo <= 0) + dev->watchdog_timeo = 5*HZ; + if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + dev_hold(dev); + } +} + +static void dev_watchdog_up(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + __netdev_watchdog_up(dev); + spin_unlock_bh(&dev->xmit_lock); +} + +static void dev_watchdog_down(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + if (del_timer(&dev->watchdog_timer)) + __dev_put(dev); + spin_unlock_bh(&dev->xmit_lock); +} + +void dev_activate(struct net_device *dev) +{ + spin_lock_bh(&dev->queue_lock); + dev->trans_start = jiffies; + dev_watchdog_up(dev); + spin_unlock_bh(&dev->queue_lock); +} + +void dev_deactivate(struct net_device *dev) +{ + dev_watchdog_down(dev); + + while (test_bit(__LINK_STATE_SCHED, &dev->state)) { + current->policy |= SCHED_YIELD; + schedule(); + } +} + +void dev_init_scheduler(struct net_device *dev) +{ + dev_watchdog_init(dev); +} + +void dev_shutdown(struct net_device *dev) +{ +} diff --git a/xen-2.4.16/net/sch_generic.c b/xen-2.4.16/net/sch_generic.c deleted file mode 100644 index 135142fe52..0000000000 --- a/xen-2.4.16/net/sch_generic.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * net/sched/sch_generic.c Generic packet scheduler routines. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, - * Jamal Hadi Salim, 990601 - * - Ingress support - */ - -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -//#include -#include -//#include -#include - -extern int net_ratelimit(void); -#define BUG_TRAP ASSERT - -/* Main transmission queue. */ - -/* Main qdisc structure lock. - - However, modifications - to data, participating in scheduling must be additionally - protected with dev->queue_lock spinlock. - - The idea is the following: - - enqueue, dequeue are serialized via top level device - spinlock dev->queue_lock. - - tree walking is protected by read_lock(qdisc_tree_lock) - and this lock is used only in process context. - - updates to tree are made only under rtnl semaphore, - hence this lock may be made without local bh disabling. - - qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! - */ -rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; - -/* - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. - - dev->xmit_lock serializes accesses to device driver. - - dev->queue_lock and dev->xmit_lock are mutually exclusive, - if one is grabbed, another must be free. - */ - - -/* Kick device. - Note, that this procedure can be called by a watchdog timer, so that - we do not check dev->tbusy flag here. - - Returns: 0 - queue is empty. - >0 - queue is not empty, but throttled. - <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. - - NOTE: Called under dev->queue_lock with locally disabled BH. -*/ - -int qdisc_restart(struct net_device *dev) -{ - struct Qdisc *q = dev->qdisc; - struct sk_buff *skb; - - /* Dequeue packet */ - if ((skb = q->dequeue(q)) != NULL) { - if (spin_trylock(&dev->xmit_lock)) { - /* Remember that the driver is grabbed by us. */ - dev->xmit_lock_owner = smp_processor_id(); - - /* And release queue */ - spin_unlock(&dev->queue_lock); - - if (!netif_queue_stopped(dev)) { -#if 0 - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); -#endif - - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); - - spin_lock(&dev->queue_lock); - return -1; - } - } - - /* Release the driver */ - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); - spin_lock(&dev->queue_lock); - q = dev->qdisc; - } else { - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - return -1; - } - netdev_rx_stat[smp_processor_id()].cpu_collision++; - } - - /* Device kicked us out :( - This is possible in three cases: - - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ - - q->ops->requeue(skb, q); - netif_schedule(dev); - return 1; - } - return q->q.qlen; -} - -static void dev_watchdog(unsigned long arg) -{ - struct net_device *dev = (struct net_device *)arg; - - spin_lock(&dev->xmit_lock); - if (dev->qdisc != &noop_qdisc) { - if (netif_device_present(dev) && - netif_running(dev) && - netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - (jiffies - dev->trans_start) > dev->watchdog_timeo) { - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name); - dev->tx_timeout(dev); - } - if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) - dev_hold(dev); - } - } - spin_unlock(&dev->xmit_lock); - - dev_put(dev); -} - -static void dev_watchdog_init(struct net_device *dev) -{ - init_timer(&dev->watchdog_timer); - dev->watchdog_timer.data = (unsigned long)dev; - dev->watchdog_timer.function = dev_watchdog; -} - -void __netdev_watchdog_up(struct net_device *dev) -{ - if (dev->tx_timeout) { - if (dev->watchdog_timeo <= 0) - dev->watchdog_timeo = 5*HZ; - if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) - dev_hold(dev); - } -} - -static void dev_watchdog_up(struct net_device *dev) -{ - spin_lock_bh(&dev->xmit_lock); - __netdev_watchdog_up(dev); - spin_unlock_bh(&dev->xmit_lock); -} - -static void dev_watchdog_down(struct net_device *dev) -{ - spin_lock_bh(&dev->xmit_lock); - if (del_timer(&dev->watchdog_timer)) - __dev_put(dev); - spin_unlock_bh(&dev->xmit_lock); -} - -/* "NOOP" scheduler: the best scheduler, recommended for all interfaces - under all circumstances. It is difficult to invent anything faster or - cheaper. - */ - -static int -noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) -{ - kfree_skb(skb); - return NET_XMIT_CN; -} - -static struct sk_buff * -noop_dequeue(struct Qdisc * qdisc) -{ - return NULL; -} - -static int -noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - -struct Qdisc_ops noop_qdisc_ops = -{ - NULL, - NULL, - "noop", - 0, - - noop_enqueue, - noop_dequeue, - noop_requeue, -}; - -struct Qdisc noop_qdisc = -{ - noop_enqueue, - noop_dequeue, - TCQ_F_BUILTIN, - &noop_qdisc_ops, -}; - - -struct Qdisc_ops noqueue_qdisc_ops = -{ - NULL, - NULL, - "noqueue", - 0, - - noop_enqueue, - noop_dequeue, - noop_requeue, - -}; - -struct Qdisc noqueue_qdisc = -{ - NULL, - noop_dequeue, - TCQ_F_BUILTIN, - &noqueue_qdisc_ops, -}; - - -static const u8 prio2band[TC_PRIO_MAX+1] = -{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; - -/* 3-band FIFO queue: old style, but should be a bit faster than - generic prio+fifo combination. - */ - -static int -pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - struct sk_buff_head *list; - - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; - - if (list->qlen <= skb->dev->tx_queue_len) { - __skb_queue_tail(list, skb); - qdisc->q.qlen++; - return 0; - } - //qdisc->stats.drops++; - kfree_skb(skb); - return NET_XMIT_DROP; -} - -static struct sk_buff * -pfifo_fast_dequeue(struct Qdisc* qdisc) -{ - int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); - struct sk_buff *skb; - - for (prio = 0; prio < 3; prio++, list++) { - skb = __skb_dequeue(list); - if (skb) { - qdisc->q.qlen--; - return skb; - } - } - return NULL; -} - -static int -pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - struct sk_buff_head *list; - - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; - - __skb_queue_head(list, skb); - qdisc->q.qlen++; - return 0; -} - -static void -pfifo_fast_reset(struct Qdisc* qdisc) -{ - int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); - - for (prio=0; prio < 3; prio++) - skb_queue_purge(list+prio); - qdisc->q.qlen = 0; -} - -static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) -{ - int i; - struct sk_buff_head *list; - - list = ((struct sk_buff_head*)qdisc->data); - - for (i=0; i<3; i++) - skb_queue_head_init(list+i); - - return 0; -} - -static struct Qdisc_ops pfifo_fast_ops = -{ - NULL, - NULL, - "pfifo_fast", - 3 * sizeof(struct sk_buff_head), - - pfifo_fast_enqueue, - pfifo_fast_dequeue, - pfifo_fast_requeue, - NULL, - - pfifo_fast_init, - pfifo_fast_reset, -}; - -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) -{ - struct Qdisc *sch; - int size = sizeof(*sch) + ops->priv_size; - - sch = kmalloc(size, GFP_KERNEL); - if (!sch) - return NULL; - memset(sch, 0, size); - - skb_queue_head_init(&sch->q); - sch->ops = ops; - sch->enqueue = ops->enqueue; - sch->dequeue = ops->dequeue; - sch->dev = dev; - //sch->stats.lock = &dev->queue_lock; - atomic_set(&sch->refcnt, 1); - if (!ops->init || ops->init(sch, NULL) == 0) - return sch; - - kfree(sch); - return NULL; -} - -/* Under dev->queue_lock and BH! */ - -void qdisc_reset(struct Qdisc *qdisc) -{ - struct Qdisc_ops *ops = qdisc->ops; - - if (ops->reset) - ops->reset(qdisc); -} - -/* Under dev->queue_lock and BH! */ - -void qdisc_destroy(struct Qdisc *qdisc) -{ - struct Qdisc_ops *ops = qdisc->ops; - struct net_device *dev; - - if (!atomic_dec_and_test(&qdisc->refcnt)) - return; - - dev = qdisc->dev; - -#ifdef CONFIG_NET_SCHED - if (dev) { - struct Qdisc *q, **qp; - for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { - if (q == qdisc) { - *qp = q->next; - break; - } - } - } -#ifdef CONFIG_NET_ESTIMATOR - qdisc_kill_estimator(&qdisc->stats); -#endif -#endif - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - if (!(qdisc->flags&TCQ_F_BUILTIN)) - kfree(qdisc); -} - - -void dev_activate(struct net_device *dev) -{ - /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces - */ - - if (dev->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - } else { - qdisc = &noqueue_qdisc; - } - write_lock(&qdisc_tree_lock); - dev->qdisc_sleeping = qdisc; - write_unlock(&qdisc_tree_lock); - } - - spin_lock_bh(&dev->queue_lock); - if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) { - dev->trans_start = jiffies; - dev_watchdog_up(dev); - } - spin_unlock_bh(&dev->queue_lock); -} - -void dev_deactivate(struct net_device *dev) -{ - struct Qdisc *qdisc; - - spin_lock_bh(&dev->queue_lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; - - qdisc_reset(qdisc); - - spin_unlock_bh(&dev->queue_lock); - - dev_watchdog_down(dev); - - while (test_bit(__LINK_STATE_SCHED, &dev->state)) { - current->policy |= SCHED_YIELD; - schedule(); - } - - spin_unlock_wait(&dev->xmit_lock); -} - -void dev_init_scheduler(struct net_device *dev) -{ - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); - dev->qdisc = &noop_qdisc; - spin_unlock_bh(&dev->queue_lock); - dev->qdisc_sleeping = &noop_qdisc; - dev->qdisc_list = NULL; - write_unlock(&qdisc_tree_lock); - - dev_watchdog_init(dev); -} - -void dev_shutdown(struct net_device *dev) -{ - struct Qdisc *qdisc; - - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; - qdisc_destroy(qdisc); - } -#endif - BUG_TRAP(dev->qdisc_list == NULL); - BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - dev->qdisc_list = NULL; - spin_unlock_bh(&dev->queue_lock); - write_unlock(&qdisc_tree_lock); -} diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c index ec9b59bdbd..0a4f2639e8 100644 --- a/xen-2.4.16/net/skbuff.c +++ b/xen-2.4.16/net/skbuff.c @@ -40,26 +40,14 @@ #include #include #include -//#include #include #include #include -//#include -//#include #include #include -//#include #include #include #include -//#include - -//#include -//#include -//#include -//#include -//#include -//#include #include #include @@ -162,7 +150,7 @@ static inline u8 *alloc_skb_data_page(struct sk_buff *skb) list_ptr = free_list.next; pf = list_entry(list_ptr, struct pfn_info, list); - pf->flags = 0; // owned by dom0 + pf->flags = 0; /* owned by dom0 */ list_del(&pf->list); free_pfns--; @@ -218,14 +206,9 @@ struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) if (data == NULL) goto nodata; - // This is so that pci_map_single does the right thing in the driver. - // If you want to ever use this pointer otherwise, you need to regenerate it - // based on skb->pf. + /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */ data = phys_to_virt((unsigned long)data); - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - /* Load the data pointers. */ skb->head = data; skb->data = data; @@ -302,9 +285,6 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) if (data == NULL) goto nodata; - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - /* Load the data pointers. */ skb->head = data; skb->data = data; @@ -343,15 +323,9 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache, skb->next = NULL; skb->prev = NULL; skb->list = NULL; - skb->sk = NULL; - skb->stamp.tv_sec=0; /* No idea about time */ skb->dev = NULL; -// skb->dst = NULL; - memset(skb->cb, 0, sizeof(skb->cb)); skb->pkt_type = PACKET_HOST; /* Default type */ skb->ip_summed = 0; - skb->priority = 0; - skb->security = 0; /* By default packets are insecure */ skb->destructor = NULL; #ifdef CONFIG_NETFILTER @@ -411,7 +385,7 @@ static void skb_release_data(struct sk_buff *skb) } else { - BUG(); //skb_release_data called with unknown skb type! + BUG(); } } } @@ -442,7 +416,6 @@ void __kfree_skb(struct sk_buff *skb) BUG(); } -// dst_release(skb->dst); if(skb->destructor) { if (in_irq()) { printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", @@ -487,26 +460,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) n->next = n->prev = NULL; n->list = NULL; - n->sk = NULL; - C(stamp); C(dev); C(h); C(nh); C(mac); -// C(dst); -// dst_clone(n->dst); - memcpy(n->cb, skb->cb, sizeof(skb->cb)); C(len); C(data_len); C(csum); n->cloned = 1; C(pkt_type); C(ip_summed); - C(priority); atomic_set(&n->users, 1); C(protocol); - C(security); - C(truesize); C(head); C(data); C(tail); @@ -543,20 +508,14 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) unsigned long offset = new->data - old->data; new->list=NULL; - new->sk=NULL; new->dev=old->dev; - new->priority=old->priority; new->protocol=old->protocol; -// new->dst=dst_clone(old->dst); new->h.raw=old->h.raw+offset; new->nh.raw=old->nh.raw+offset; new->mac.raw=old->mac.raw+offset; - memcpy(new->cb, old->cb, sizeof(old->cb)); atomic_set(&new->users, 1); new->pkt_type=old->pkt_type; - new->stamp=old->stamp; new->destructor = NULL; - new->security=old->security; #ifdef CONFIG_NETFILTER new->nfmark=old->nfmark; new->nfcache=old->nfcache; diff --git a/xen-2.4.16/net/utils.c b/xen-2.4.16/net/utils.c deleted file mode 100644 index 4cf312ab4b..0000000000 --- a/xen-2.4.16/net/utils.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Generic address resultion entity - * - * Authors: - * net_random Alan Cox - * net_ratelimit Andy Kleen - * - * Created by Alexey Kuznetsov - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -//#include -#include -//#include -#include - -static unsigned long net_rand_seed = 152L; - -unsigned long net_random(void) -{ - net_rand_seed=net_rand_seed*69069L+1; - return net_rand_seed^jiffies; -} - -void net_srandom(unsigned long entropy) -{ - net_rand_seed ^= entropy; - net_random(); -} - -int net_msg_cost = 5*HZ; -int net_msg_burst = 10*5*HZ; - -/* - * This enforces a rate limit: not more than one kernel message - * every 5secs to make a denial-of-service attack impossible. - * - * All warning printk()s should be guarded by this function. - */ -int net_ratelimit(void) -{ - static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; - static unsigned long toks = 10*5*HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; - - spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > net_msg_burst) - toks = net_msg_burst; - if (toks >= net_msg_cost) { - int lost = missed; - missed = 0; - toks -= net_msg_cost; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "NET: %d messages suppressed.\n", lost); - return 1; - } - missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; -} -- 2.30.2